forked from phoenix/litellm-mirror
Merge pull request #2856 from lazyhope/anthropic-tools-use-2024-04-04
Support latest Anthropic Tools Use (2024-04-04)
This commit is contained in:
commit
a50edef1e6
5 changed files with 231 additions and 59 deletions
|
@ -2,18 +2,12 @@ import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests, copy
|
import requests, copy
|
||||||
import time, uuid
|
import time
|
||||||
from typing import Callable, Optional, List
|
from typing import Callable, Optional, List
|
||||||
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
||||||
import litellm
|
import litellm
|
||||||
from .prompt_templates.factory import (
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
contains_tag,
|
|
||||||
prompt_factory,
|
|
||||||
custom_prompt,
|
|
||||||
construct_tool_use_system_prompt,
|
|
||||||
extract_between_tags,
|
|
||||||
parse_xml_params,
|
|
||||||
)
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
|
@ -118,7 +112,6 @@ def completion(
|
||||||
):
|
):
|
||||||
headers = validate_environment(api_key, headers)
|
headers = validate_environment(api_key, headers)
|
||||||
_is_function_call = False
|
_is_function_call = False
|
||||||
json_schemas: dict = {}
|
|
||||||
messages = copy.deepcopy(messages)
|
messages = copy.deepcopy(messages)
|
||||||
optional_params = copy.deepcopy(optional_params)
|
optional_params = copy.deepcopy(optional_params)
|
||||||
if model in custom_prompt_dict:
|
if model in custom_prompt_dict:
|
||||||
|
@ -162,17 +155,15 @@ def completion(
|
||||||
## Handle Tool Calling
|
## Handle Tool Calling
|
||||||
if "tools" in optional_params:
|
if "tools" in optional_params:
|
||||||
_is_function_call = True
|
_is_function_call = True
|
||||||
|
headers["anthropic-beta"] = "tools-2024-04-04"
|
||||||
|
|
||||||
|
anthropic_tools = []
|
||||||
for tool in optional_params["tools"]:
|
for tool in optional_params["tools"]:
|
||||||
json_schemas[tool["function"]["name"]] = tool["function"].get(
|
new_tool = tool["function"]
|
||||||
"parameters", None
|
new_tool["input_schema"] = new_tool.pop("parameters") # rename key
|
||||||
)
|
anthropic_tools.append(new_tool)
|
||||||
tool_calling_system_prompt = construct_tool_use_system_prompt(
|
|
||||||
tools=optional_params["tools"]
|
optional_params["tools"] = anthropic_tools
|
||||||
)
|
|
||||||
optional_params["system"] = (
|
|
||||||
optional_params.get("system", "\n") + tool_calling_system_prompt
|
|
||||||
) # add the anthropic tool calling prompt to the system prompt
|
|
||||||
optional_params.pop("tools")
|
|
||||||
|
|
||||||
stream = optional_params.pop("stream", None)
|
stream = optional_params.pop("stream", None)
|
||||||
|
|
||||||
|
@ -195,9 +186,9 @@ def completion(
|
||||||
print_verbose(f"_is_function_call: {_is_function_call}")
|
print_verbose(f"_is_function_call: {_is_function_call}")
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
if (
|
if (
|
||||||
stream is not None and stream == True and _is_function_call == False
|
stream and not _is_function_call
|
||||||
): # if function call - fake the streaming (need complete blocks for output parsing in openai format)
|
): # if function call - fake the streaming (need complete blocks for output parsing in openai format)
|
||||||
print_verbose(f"makes anthropic streaming POST request")
|
print_verbose("makes anthropic streaming POST request")
|
||||||
data["stream"] = stream
|
data["stream"] = stream
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
api_base,
|
api_base,
|
||||||
|
@ -245,46 +236,40 @@ def completion(
|
||||||
status_code=response.status_code,
|
status_code=response.status_code,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
text_content = completion_response["content"][0].get("text", None)
|
text_content = ""
|
||||||
## TOOL CALLING - OUTPUT PARSE
|
tool_calls = []
|
||||||
if text_content is not None and contains_tag("invoke", text_content):
|
for content in completion_response["content"]:
|
||||||
function_name = extract_between_tags("tool_name", text_content)[0]
|
if content["type"] == "text":
|
||||||
function_arguments_str = extract_between_tags("invoke", text_content)[
|
text_content += content["text"]
|
||||||
0
|
## TOOL CALLING
|
||||||
].strip()
|
elif content["type"] == "tool_use":
|
||||||
function_arguments_str = f"<invoke>{function_arguments_str}</invoke>"
|
tool_calls.append(
|
||||||
function_arguments = parse_xml_params(
|
|
||||||
function_arguments_str,
|
|
||||||
json_schema=json_schemas.get(
|
|
||||||
function_name, None
|
|
||||||
), # check if we have a json schema for this function name
|
|
||||||
)
|
|
||||||
_message = litellm.Message(
|
|
||||||
tool_calls=[
|
|
||||||
{
|
{
|
||||||
"id": f"call_{uuid.uuid4()}",
|
"id": content["id"],
|
||||||
"type": "function",
|
"type": "function",
|
||||||
"function": {
|
"function": {
|
||||||
"name": function_name,
|
"name": content["name"],
|
||||||
"arguments": json.dumps(function_arguments),
|
"arguments": json.dumps(content["input"]),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
],
|
)
|
||||||
content=None,
|
|
||||||
)
|
_message = litellm.Message(
|
||||||
model_response.choices[0].message = _message # type: ignore
|
tool_calls=tool_calls,
|
||||||
model_response._hidden_params["original_response"] = (
|
content=text_content or None,
|
||||||
text_content # allow user to access raw anthropic tool calling response
|
)
|
||||||
)
|
model_response.choices[0].message = _message # type: ignore
|
||||||
else:
|
model_response._hidden_params["original_response"] = completion_response[
|
||||||
model_response.choices[0].message.content = text_content # type: ignore
|
"content"
|
||||||
|
] # allow user to access raw anthropic tool calling response
|
||||||
|
|
||||||
model_response.choices[0].finish_reason = map_finish_reason(
|
model_response.choices[0].finish_reason = map_finish_reason(
|
||||||
completion_response["stop_reason"]
|
completion_response["stop_reason"]
|
||||||
)
|
)
|
||||||
|
|
||||||
print_verbose(f"_is_function_call: {_is_function_call}; stream: {stream}")
|
print_verbose(f"_is_function_call: {_is_function_call}; stream: {stream}")
|
||||||
if _is_function_call == True and stream is not None and stream == True:
|
if _is_function_call and stream:
|
||||||
print_verbose(f"INSIDE ANTHROPIC STREAMING TOOL CALLING CONDITION BLOCK")
|
print_verbose("INSIDE ANTHROPIC STREAMING TOOL CALLING CONDITION BLOCK")
|
||||||
# return an iterator
|
# return an iterator
|
||||||
streaming_model_response = ModelResponse(stream=True)
|
streaming_model_response = ModelResponse(stream=True)
|
||||||
streaming_model_response.choices[0].finish_reason = model_response.choices[
|
streaming_model_response.choices[0].finish_reason = model_response.choices[
|
||||||
|
@ -318,7 +303,7 @@ def completion(
|
||||||
model_response=streaming_model_response
|
model_response=streaming_model_response
|
||||||
)
|
)
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Returns anthropic CustomStreamWrapper with 'cached_response' streaming object"
|
"Returns anthropic CustomStreamWrapper with 'cached_response' streaming object"
|
||||||
)
|
)
|
||||||
return CustomStreamWrapper(
|
return CustomStreamWrapper(
|
||||||
completion_stream=completion_stream,
|
completion_stream=completion_stream,
|
||||||
|
@ -337,7 +322,7 @@ def completion(
|
||||||
usage = Usage(
|
usage = Usage(
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=prompt_tokens + completion_tokens,
|
total_tokens=total_tokens,
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
model_response.usage = usage
|
||||||
return model_response
|
return model_response
|
||||||
|
|
|
@ -746,7 +746,7 @@ def completion(
|
||||||
]
|
]
|
||||||
# Format rest of message according to anthropic guidelines
|
# Format rest of message according to anthropic guidelines
|
||||||
messages = prompt_factory(
|
messages = prompt_factory(
|
||||||
model=model, messages=messages, custom_llm_provider="anthropic"
|
model=model, messages=messages, custom_llm_provider="anthropic_xml"
|
||||||
)
|
)
|
||||||
## LOAD CONFIG
|
## LOAD CONFIG
|
||||||
config = litellm.AmazonAnthropicClaude3Config.get_config()
|
config = litellm.AmazonAnthropicClaude3Config.get_config()
|
||||||
|
@ -1108,6 +1108,7 @@ def completion(
|
||||||
|
|
||||||
raise BedrockError(status_code=500, message=traceback.format_exc())
|
raise BedrockError(status_code=500, message=traceback.format_exc())
|
||||||
|
|
||||||
|
|
||||||
class ModelResponseIterator:
|
class ModelResponseIterator:
|
||||||
def __init__(self, model_response):
|
def __init__(self, model_response):
|
||||||
self.model_response = model_response
|
self.model_response = model_response
|
||||||
|
@ -1133,6 +1134,7 @@ class ModelResponseIterator:
|
||||||
self.is_done = True
|
self.is_done = True
|
||||||
return self.model_response
|
return self.model_response
|
||||||
|
|
||||||
|
|
||||||
def _embedding_func_single(
|
def _embedding_func_single(
|
||||||
model: str,
|
model: str,
|
||||||
input: str,
|
input: str,
|
||||||
|
|
|
@ -556,7 +556,9 @@ def convert_to_anthropic_image_obj(openai_image_url: str):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def convert_to_anthropic_tool_result(message: dict) -> str:
|
# The following XML functions will be deprecated once JSON schema support is available on Bedrock and Vertex
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
def convert_to_anthropic_tool_result_xml(message: dict) -> str:
|
||||||
"""
|
"""
|
||||||
OpenAI message with a tool result looks like:
|
OpenAI message with a tool result looks like:
|
||||||
{
|
{
|
||||||
|
@ -606,7 +608,7 @@ def convert_to_anthropic_tool_result(message: dict) -> str:
|
||||||
return anthropic_tool_result
|
return anthropic_tool_result
|
||||||
|
|
||||||
|
|
||||||
def convert_to_anthropic_tool_invoke(tool_calls: list) -> str:
|
def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str:
|
||||||
invokes = ""
|
invokes = ""
|
||||||
for tool in tool_calls:
|
for tool in tool_calls:
|
||||||
if tool["type"] != "function":
|
if tool["type"] != "function":
|
||||||
|
@ -631,7 +633,7 @@ def convert_to_anthropic_tool_invoke(tool_calls: list) -> str:
|
||||||
return anthropic_tool_invoke
|
return anthropic_tool_invoke
|
||||||
|
|
||||||
|
|
||||||
def anthropic_messages_pt(messages: list):
|
def anthropic_messages_pt_xml(messages: list):
|
||||||
"""
|
"""
|
||||||
format messages for anthropic
|
format messages for anthropic
|
||||||
1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant)
|
1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant)
|
||||||
|
@ -720,6 +722,185 @@ def anthropic_messages_pt(messages: list):
|
||||||
return new_messages
|
return new_messages
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_anthropic_tool_result(message: dict) -> dict:
|
||||||
|
"""
|
||||||
|
OpenAI message with a tool result looks like:
|
||||||
|
{
|
||||||
|
"tool_call_id": "tool_1",
|
||||||
|
"role": "tool",
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"content": "function result goes here",
|
||||||
|
},
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
Anthropic tool_results look like:
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "tool_result",
|
||||||
|
"tool_use_id": "toolu_01A09q90qw90lq917835lq9",
|
||||||
|
"content": "ConnectionError: the weather service API is not available (HTTP 500)",
|
||||||
|
# "is_error": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
tool_call_id = message.get("tool_call_id")
|
||||||
|
content = message.get("content")
|
||||||
|
|
||||||
|
# We can't determine from openai message format whether it's a successful or
|
||||||
|
# error call result so default to the successful result template
|
||||||
|
anthropic_tool_result = {
|
||||||
|
"type": "tool_result",
|
||||||
|
"tool_use_id": tool_call_id,
|
||||||
|
"content": content,
|
||||||
|
}
|
||||||
|
|
||||||
|
return anthropic_tool_result
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_anthropic_tool_invoke(tool_calls: list) -> list:
|
||||||
|
"""
|
||||||
|
OpenAI tool invokes:
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": null,
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "call_abc123",
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"arguments": "{\n\"location\": \"Boston, MA\"\n}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
Anthropic tool invokes:
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "<thinking>To answer this question, I will: 1. Use the get_weather tool to get the current weather in San Francisco. 2. Use the get_time tool to get the current time in the America/Los_Angeles timezone, which covers San Francisco, CA.</thinking>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "tool_use",
|
||||||
|
"id": "toolu_01A09q90qw90lq917835lq9",
|
||||||
|
"name": "get_weather",
|
||||||
|
"input": {"location": "San Francisco, CA"}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
anthropic_tool_invoke = [
|
||||||
|
{
|
||||||
|
"type": "tool_use",
|
||||||
|
"id": tool["id"],
|
||||||
|
"name": tool["function"]["name"],
|
||||||
|
"input": json.loads(tool["function"]["arguments"]),
|
||||||
|
}
|
||||||
|
for tool in tool_calls
|
||||||
|
if tool["type"] == "function"
|
||||||
|
]
|
||||||
|
|
||||||
|
return anthropic_tool_invoke
|
||||||
|
|
||||||
|
|
||||||
|
def anthropic_messages_pt(messages: list):
|
||||||
|
"""
|
||||||
|
format messages for anthropic
|
||||||
|
1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant)
|
||||||
|
2. The first message always needs to be of role "user"
|
||||||
|
3. Each message must alternate between "user" and "assistant" (this is not addressed as now by litellm)
|
||||||
|
4. final assistant content cannot end with trailing whitespace (anthropic raises an error otherwise)
|
||||||
|
5. System messages are a separate param to the Messages API
|
||||||
|
6. Ensure we only accept role, content. (message.name is not supported)
|
||||||
|
"""
|
||||||
|
# add role=tool support to allow function call result/error submission
|
||||||
|
user_message_types = {"user", "tool"}
|
||||||
|
# reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, merge them.
|
||||||
|
new_messages = []
|
||||||
|
msg_i = 0
|
||||||
|
while msg_i < len(messages):
|
||||||
|
user_content = []
|
||||||
|
## MERGE CONSECUTIVE USER CONTENT ##
|
||||||
|
while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types:
|
||||||
|
if isinstance(messages[msg_i]["content"], list):
|
||||||
|
for m in messages[msg_i]["content"]:
|
||||||
|
if m.get("type", "") == "image_url":
|
||||||
|
user_content.append(
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source": convert_to_anthropic_image_obj(
|
||||||
|
m["image_url"]["url"]
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
elif m.get("type", "") == "text":
|
||||||
|
user_content.append({"type": "text", "text": m["text"]})
|
||||||
|
elif messages[msg_i]["role"] == "tool":
|
||||||
|
# OpenAI's tool message content will always be a string
|
||||||
|
user_content.append(convert_to_anthropic_tool_result(messages[msg_i]))
|
||||||
|
else:
|
||||||
|
user_content.append(
|
||||||
|
{"type": "text", "text": messages[msg_i]["content"]}
|
||||||
|
)
|
||||||
|
|
||||||
|
msg_i += 1
|
||||||
|
|
||||||
|
if user_content:
|
||||||
|
new_messages.append({"role": "user", "content": user_content})
|
||||||
|
|
||||||
|
assistant_content = []
|
||||||
|
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
|
||||||
|
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
|
||||||
|
assistant_text = (
|
||||||
|
messages[msg_i].get("content") or ""
|
||||||
|
) # either string or none
|
||||||
|
if assistant_text:
|
||||||
|
assistant_content.append({"type": "text", "text": assistant_text})
|
||||||
|
|
||||||
|
if messages[msg_i].get(
|
||||||
|
"tool_calls", []
|
||||||
|
): # support assistant tool invoke convertion
|
||||||
|
assistant_content.extend(
|
||||||
|
convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"])
|
||||||
|
)
|
||||||
|
|
||||||
|
msg_i += 1
|
||||||
|
|
||||||
|
if assistant_content:
|
||||||
|
new_messages.append({"role": "assistant", "content": assistant_content})
|
||||||
|
|
||||||
|
if new_messages[0]["role"] != "user":
|
||||||
|
if litellm.modify_params:
|
||||||
|
new_messages.insert(
|
||||||
|
0, {"role": "user", "content": [{"type": "text", "text": "."}]}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
"Invalid first message. Should always start with 'role'='user' for Anthropic. System prompt is sent separately for Anthropic. set 'litellm.modify_params = True' or 'litellm_settings:modify_params = True' on proxy, to insert a placeholder user message - '.' as the first message, "
|
||||||
|
)
|
||||||
|
|
||||||
|
if new_messages[-1]["role"] == "assistant":
|
||||||
|
for content in new_messages[-1]["content"]:
|
||||||
|
if isinstance(content, dict) and content["type"] == "text":
|
||||||
|
content["text"] = content[
|
||||||
|
"text"
|
||||||
|
].rstrip() # no trailing whitespace for final assistant message
|
||||||
|
|
||||||
|
return new_messages
|
||||||
|
|
||||||
|
|
||||||
def extract_between_tags(tag: str, string: str, strip: bool = False) -> List[str]:
|
def extract_between_tags(tag: str, string: str, strip: bool = False) -> List[str]:
|
||||||
ext_list = re.findall(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL)
|
ext_list = re.findall(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL)
|
||||||
if strip:
|
if strip:
|
||||||
|
@ -1081,6 +1262,8 @@ def prompt_factory(
|
||||||
if model == "claude-instant-1" or model == "claude-2":
|
if model == "claude-instant-1" or model == "claude-2":
|
||||||
return anthropic_pt(messages=messages)
|
return anthropic_pt(messages=messages)
|
||||||
return anthropic_messages_pt(messages=messages)
|
return anthropic_messages_pt(messages=messages)
|
||||||
|
elif custom_llm_provider == "anthropic_xml":
|
||||||
|
return anthropic_messages_pt_xml(messages=messages)
|
||||||
elif custom_llm_provider == "together_ai":
|
elif custom_llm_provider == "together_ai":
|
||||||
prompt_format, chat_template = get_model_info(token=api_key, model=model)
|
prompt_format, chat_template = get_model_info(token=api_key, model=model)
|
||||||
return format_prompt_togetherai(
|
return format_prompt_togetherai(
|
||||||
|
|
|
@ -189,7 +189,7 @@ def completion(
|
||||||
# Format rest of message according to anthropic guidelines
|
# Format rest of message according to anthropic guidelines
|
||||||
try:
|
try:
|
||||||
messages = prompt_factory(
|
messages = prompt_factory(
|
||||||
model=model, messages=messages, custom_llm_provider="anthropic"
|
model=model, messages=messages, custom_llm_provider="anthropic_xml"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise VertexAIError(status_code=400, message=str(e))
|
raise VertexAIError(status_code=400, message=str(e))
|
||||||
|
|
|
@ -207,6 +207,8 @@ def map_finish_reason(
|
||||||
return "stop"
|
return "stop"
|
||||||
elif finish_reason == "max_tokens": # anthropic
|
elif finish_reason == "max_tokens": # anthropic
|
||||||
return "length"
|
return "length"
|
||||||
|
elif finish_reason == "tool_use": # anthropic
|
||||||
|
return "tool_calls"
|
||||||
return finish_reason
|
return finish_reason
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue