forked from phoenix/litellm-mirror
Merge pull request #2856 from lazyhope/anthropic-tools-use-2024-04-04
Support latest Anthropic Tools Use (2024-04-04)
This commit is contained in:
commit
a50edef1e6
5 changed files with 231 additions and 59 deletions
|
@ -2,18 +2,12 @@ import os, types
|
|||
import json
|
||||
from enum import Enum
|
||||
import requests, copy
|
||||
import time, uuid
|
||||
import time
|
||||
from typing import Callable, Optional, List
|
||||
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
||||
import litellm
|
||||
from .prompt_templates.factory import (
|
||||
contains_tag,
|
||||
prompt_factory,
|
||||
custom_prompt,
|
||||
construct_tool_use_system_prompt,
|
||||
extract_between_tags,
|
||||
parse_xml_params,
|
||||
)
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
|
@ -118,7 +112,6 @@ def completion(
|
|||
):
|
||||
headers = validate_environment(api_key, headers)
|
||||
_is_function_call = False
|
||||
json_schemas: dict = {}
|
||||
messages = copy.deepcopy(messages)
|
||||
optional_params = copy.deepcopy(optional_params)
|
||||
if model in custom_prompt_dict:
|
||||
|
@ -162,17 +155,15 @@ def completion(
|
|||
## Handle Tool Calling
|
||||
if "tools" in optional_params:
|
||||
_is_function_call = True
|
||||
headers["anthropic-beta"] = "tools-2024-04-04"
|
||||
|
||||
anthropic_tools = []
|
||||
for tool in optional_params["tools"]:
|
||||
json_schemas[tool["function"]["name"]] = tool["function"].get(
|
||||
"parameters", None
|
||||
)
|
||||
tool_calling_system_prompt = construct_tool_use_system_prompt(
|
||||
tools=optional_params["tools"]
|
||||
)
|
||||
optional_params["system"] = (
|
||||
optional_params.get("system", "\n") + tool_calling_system_prompt
|
||||
) # add the anthropic tool calling prompt to the system prompt
|
||||
optional_params.pop("tools")
|
||||
new_tool = tool["function"]
|
||||
new_tool["input_schema"] = new_tool.pop("parameters") # rename key
|
||||
anthropic_tools.append(new_tool)
|
||||
|
||||
optional_params["tools"] = anthropic_tools
|
||||
|
||||
stream = optional_params.pop("stream", None)
|
||||
|
||||
|
@ -195,9 +186,9 @@ def completion(
|
|||
print_verbose(f"_is_function_call: {_is_function_call}")
|
||||
## COMPLETION CALL
|
||||
if (
|
||||
stream is not None and stream == True and _is_function_call == False
|
||||
stream and not _is_function_call
|
||||
): # if function call - fake the streaming (need complete blocks for output parsing in openai format)
|
||||
print_verbose(f"makes anthropic streaming POST request")
|
||||
print_verbose("makes anthropic streaming POST request")
|
||||
data["stream"] = stream
|
||||
response = requests.post(
|
||||
api_base,
|
||||
|
@ -245,46 +236,40 @@ def completion(
|
|||
status_code=response.status_code,
|
||||
)
|
||||
else:
|
||||
text_content = completion_response["content"][0].get("text", None)
|
||||
## TOOL CALLING - OUTPUT PARSE
|
||||
if text_content is not None and contains_tag("invoke", text_content):
|
||||
function_name = extract_between_tags("tool_name", text_content)[0]
|
||||
function_arguments_str = extract_between_tags("invoke", text_content)[
|
||||
0
|
||||
].strip()
|
||||
function_arguments_str = f"<invoke>{function_arguments_str}</invoke>"
|
||||
function_arguments = parse_xml_params(
|
||||
function_arguments_str,
|
||||
json_schema=json_schemas.get(
|
||||
function_name, None
|
||||
), # check if we have a json schema for this function name
|
||||
)
|
||||
_message = litellm.Message(
|
||||
tool_calls=[
|
||||
text_content = ""
|
||||
tool_calls = []
|
||||
for content in completion_response["content"]:
|
||||
if content["type"] == "text":
|
||||
text_content += content["text"]
|
||||
## TOOL CALLING
|
||||
elif content["type"] == "tool_use":
|
||||
tool_calls.append(
|
||||
{
|
||||
"id": f"call_{uuid.uuid4()}",
|
||||
"id": content["id"],
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": function_name,
|
||||
"arguments": json.dumps(function_arguments),
|
||||
"name": content["name"],
|
||||
"arguments": json.dumps(content["input"]),
|
||||
},
|
||||
}
|
||||
],
|
||||
content=None,
|
||||
)
|
||||
model_response.choices[0].message = _message # type: ignore
|
||||
model_response._hidden_params["original_response"] = (
|
||||
text_content # allow user to access raw anthropic tool calling response
|
||||
)
|
||||
else:
|
||||
model_response.choices[0].message.content = text_content # type: ignore
|
||||
)
|
||||
|
||||
_message = litellm.Message(
|
||||
tool_calls=tool_calls,
|
||||
content=text_content or None,
|
||||
)
|
||||
model_response.choices[0].message = _message # type: ignore
|
||||
model_response._hidden_params["original_response"] = completion_response[
|
||||
"content"
|
||||
] # allow user to access raw anthropic tool calling response
|
||||
|
||||
model_response.choices[0].finish_reason = map_finish_reason(
|
||||
completion_response["stop_reason"]
|
||||
)
|
||||
|
||||
print_verbose(f"_is_function_call: {_is_function_call}; stream: {stream}")
|
||||
if _is_function_call == True and stream is not None and stream == True:
|
||||
print_verbose(f"INSIDE ANTHROPIC STREAMING TOOL CALLING CONDITION BLOCK")
|
||||
if _is_function_call and stream:
|
||||
print_verbose("INSIDE ANTHROPIC STREAMING TOOL CALLING CONDITION BLOCK")
|
||||
# return an iterator
|
||||
streaming_model_response = ModelResponse(stream=True)
|
||||
streaming_model_response.choices[0].finish_reason = model_response.choices[
|
||||
|
@ -318,7 +303,7 @@ def completion(
|
|||
model_response=streaming_model_response
|
||||
)
|
||||
print_verbose(
|
||||
f"Returns anthropic CustomStreamWrapper with 'cached_response' streaming object"
|
||||
"Returns anthropic CustomStreamWrapper with 'cached_response' streaming object"
|
||||
)
|
||||
return CustomStreamWrapper(
|
||||
completion_stream=completion_stream,
|
||||
|
@ -337,7 +322,7 @@ def completion(
|
|||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
)
|
||||
model_response.usage = usage
|
||||
return model_response
|
||||
|
|
|
@ -746,7 +746,7 @@ def completion(
|
|||
]
|
||||
# Format rest of message according to anthropic guidelines
|
||||
messages = prompt_factory(
|
||||
model=model, messages=messages, custom_llm_provider="anthropic"
|
||||
model=model, messages=messages, custom_llm_provider="anthropic_xml"
|
||||
)
|
||||
## LOAD CONFIG
|
||||
config = litellm.AmazonAnthropicClaude3Config.get_config()
|
||||
|
@ -1108,6 +1108,7 @@ def completion(
|
|||
|
||||
raise BedrockError(status_code=500, message=traceback.format_exc())
|
||||
|
||||
|
||||
class ModelResponseIterator:
|
||||
def __init__(self, model_response):
|
||||
self.model_response = model_response
|
||||
|
@ -1133,6 +1134,7 @@ class ModelResponseIterator:
|
|||
self.is_done = True
|
||||
return self.model_response
|
||||
|
||||
|
||||
def _embedding_func_single(
|
||||
model: str,
|
||||
input: str,
|
||||
|
|
|
@ -556,7 +556,9 @@ def convert_to_anthropic_image_obj(openai_image_url: str):
|
|||
)
|
||||
|
||||
|
||||
def convert_to_anthropic_tool_result(message: dict) -> str:
|
||||
# The following XML functions will be deprecated once JSON schema support is available on Bedrock and Vertex
|
||||
# ------------------------------------------------------------------------------
|
||||
def convert_to_anthropic_tool_result_xml(message: dict) -> str:
|
||||
"""
|
||||
OpenAI message with a tool result looks like:
|
||||
{
|
||||
|
@ -606,7 +608,7 @@ def convert_to_anthropic_tool_result(message: dict) -> str:
|
|||
return anthropic_tool_result
|
||||
|
||||
|
||||
def convert_to_anthropic_tool_invoke(tool_calls: list) -> str:
|
||||
def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str:
|
||||
invokes = ""
|
||||
for tool in tool_calls:
|
||||
if tool["type"] != "function":
|
||||
|
@ -631,7 +633,7 @@ def convert_to_anthropic_tool_invoke(tool_calls: list) -> str:
|
|||
return anthropic_tool_invoke
|
||||
|
||||
|
||||
def anthropic_messages_pt(messages: list):
|
||||
def anthropic_messages_pt_xml(messages: list):
|
||||
"""
|
||||
format messages for anthropic
|
||||
1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant)
|
||||
|
@ -720,6 +722,185 @@ def anthropic_messages_pt(messages: list):
|
|||
return new_messages
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
|
||||
def convert_to_anthropic_tool_result(message: dict) -> dict:
|
||||
"""
|
||||
OpenAI message with a tool result looks like:
|
||||
{
|
||||
"tool_call_id": "tool_1",
|
||||
"role": "tool",
|
||||
"name": "get_current_weather",
|
||||
"content": "function result goes here",
|
||||
},
|
||||
"""
|
||||
|
||||
"""
|
||||
Anthropic tool_results look like:
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_01A09q90qw90lq917835lq9",
|
||||
"content": "ConnectionError: the weather service API is not available (HTTP 500)",
|
||||
# "is_error": true
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
tool_call_id = message.get("tool_call_id")
|
||||
content = message.get("content")
|
||||
|
||||
# We can't determine from openai message format whether it's a successful or
|
||||
# error call result so default to the successful result template
|
||||
anthropic_tool_result = {
|
||||
"type": "tool_result",
|
||||
"tool_use_id": tool_call_id,
|
||||
"content": content,
|
||||
}
|
||||
|
||||
return anthropic_tool_result
|
||||
|
||||
|
||||
def convert_to_anthropic_tool_invoke(tool_calls: list) -> list:
|
||||
"""
|
||||
OpenAI tool invokes:
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_abc123",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"arguments": "{\n\"location\": \"Boston, MA\"\n}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"""
|
||||
|
||||
"""
|
||||
Anthropic tool invokes:
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "<thinking>To answer this question, I will: 1. Use the get_weather tool to get the current weather in San Francisco. 2. Use the get_time tool to get the current time in the America/Los_Angeles timezone, which covers San Francisco, CA.</thinking>"
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "toolu_01A09q90qw90lq917835lq9",
|
||||
"name": "get_weather",
|
||||
"input": {"location": "San Francisco, CA"}
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
anthropic_tool_invoke = [
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": tool["id"],
|
||||
"name": tool["function"]["name"],
|
||||
"input": json.loads(tool["function"]["arguments"]),
|
||||
}
|
||||
for tool in tool_calls
|
||||
if tool["type"] == "function"
|
||||
]
|
||||
|
||||
return anthropic_tool_invoke
|
||||
|
||||
|
||||
def anthropic_messages_pt(messages: list):
|
||||
"""
|
||||
format messages for anthropic
|
||||
1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant)
|
||||
2. The first message always needs to be of role "user"
|
||||
3. Each message must alternate between "user" and "assistant" (this is not addressed as now by litellm)
|
||||
4. final assistant content cannot end with trailing whitespace (anthropic raises an error otherwise)
|
||||
5. System messages are a separate param to the Messages API
|
||||
6. Ensure we only accept role, content. (message.name is not supported)
|
||||
"""
|
||||
# add role=tool support to allow function call result/error submission
|
||||
user_message_types = {"user", "tool"}
|
||||
# reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, merge them.
|
||||
new_messages = []
|
||||
msg_i = 0
|
||||
while msg_i < len(messages):
|
||||
user_content = []
|
||||
## MERGE CONSECUTIVE USER CONTENT ##
|
||||
while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types:
|
||||
if isinstance(messages[msg_i]["content"], list):
|
||||
for m in messages[msg_i]["content"]:
|
||||
if m.get("type", "") == "image_url":
|
||||
user_content.append(
|
||||
{
|
||||
"type": "image",
|
||||
"source": convert_to_anthropic_image_obj(
|
||||
m["image_url"]["url"]
|
||||
),
|
||||
}
|
||||
)
|
||||
elif m.get("type", "") == "text":
|
||||
user_content.append({"type": "text", "text": m["text"]})
|
||||
elif messages[msg_i]["role"] == "tool":
|
||||
# OpenAI's tool message content will always be a string
|
||||
user_content.append(convert_to_anthropic_tool_result(messages[msg_i]))
|
||||
else:
|
||||
user_content.append(
|
||||
{"type": "text", "text": messages[msg_i]["content"]}
|
||||
)
|
||||
|
||||
msg_i += 1
|
||||
|
||||
if user_content:
|
||||
new_messages.append({"role": "user", "content": user_content})
|
||||
|
||||
assistant_content = []
|
||||
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
|
||||
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
|
||||
assistant_text = (
|
||||
messages[msg_i].get("content") or ""
|
||||
) # either string or none
|
||||
if assistant_text:
|
||||
assistant_content.append({"type": "text", "text": assistant_text})
|
||||
|
||||
if messages[msg_i].get(
|
||||
"tool_calls", []
|
||||
): # support assistant tool invoke convertion
|
||||
assistant_content.extend(
|
||||
convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"])
|
||||
)
|
||||
|
||||
msg_i += 1
|
||||
|
||||
if assistant_content:
|
||||
new_messages.append({"role": "assistant", "content": assistant_content})
|
||||
|
||||
if new_messages[0]["role"] != "user":
|
||||
if litellm.modify_params:
|
||||
new_messages.insert(
|
||||
0, {"role": "user", "content": [{"type": "text", "text": "."}]}
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
"Invalid first message. Should always start with 'role'='user' for Anthropic. System prompt is sent separately for Anthropic. set 'litellm.modify_params = True' or 'litellm_settings:modify_params = True' on proxy, to insert a placeholder user message - '.' as the first message, "
|
||||
)
|
||||
|
||||
if new_messages[-1]["role"] == "assistant":
|
||||
for content in new_messages[-1]["content"]:
|
||||
if isinstance(content, dict) and content["type"] == "text":
|
||||
content["text"] = content[
|
||||
"text"
|
||||
].rstrip() # no trailing whitespace for final assistant message
|
||||
|
||||
return new_messages
|
||||
|
||||
|
||||
def extract_between_tags(tag: str, string: str, strip: bool = False) -> List[str]:
|
||||
ext_list = re.findall(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL)
|
||||
if strip:
|
||||
|
@ -1081,6 +1262,8 @@ def prompt_factory(
|
|||
if model == "claude-instant-1" or model == "claude-2":
|
||||
return anthropic_pt(messages=messages)
|
||||
return anthropic_messages_pt(messages=messages)
|
||||
elif custom_llm_provider == "anthropic_xml":
|
||||
return anthropic_messages_pt_xml(messages=messages)
|
||||
elif custom_llm_provider == "together_ai":
|
||||
prompt_format, chat_template = get_model_info(token=api_key, model=model)
|
||||
return format_prompt_togetherai(
|
||||
|
|
|
@ -189,7 +189,7 @@ def completion(
|
|||
# Format rest of message according to anthropic guidelines
|
||||
try:
|
||||
messages = prompt_factory(
|
||||
model=model, messages=messages, custom_llm_provider="anthropic"
|
||||
model=model, messages=messages, custom_llm_provider="anthropic_xml"
|
||||
)
|
||||
except Exception as e:
|
||||
raise VertexAIError(status_code=400, message=str(e))
|
||||
|
|
|
@ -207,6 +207,8 @@ def map_finish_reason(
|
|||
return "stop"
|
||||
elif finish_reason == "max_tokens": # anthropic
|
||||
return "length"
|
||||
elif finish_reason == "tool_use": # anthropic
|
||||
return "tool_calls"
|
||||
return finish_reason
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue