Merge pull request #2856 from lazyhope/anthropic-tools-use-2024-04-04

Support latest Anthropic Tools Use (2024-04-04)
2024-04-05 14:31:26 -07:00 · 2024-04-05 14:31:26 -07:00 · a50edef1e6
commit a50edef1e6
parent 9d7aaa91e0 f16d0c06fd
5 changed files with 231 additions and 59 deletions
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -2,18 +2,12 @@ import os, types
 import json
 from enum import Enum
 import requests, copy
-import time, uuid
+import time
 from typing import Callable, Optional, List
 from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
 import litellm
-from .prompt_templates.factory import (
+from .prompt_templates.factory import prompt_factory, custom_prompt
-    contains_tag,
+
    prompt_factory,
    custom_prompt,
    construct_tool_use_system_prompt,
    extract_between_tags,
    parse_xml_params,
 )
 import httpx
@ -118,7 +112,6 @@ def completion(
 ):
    headers = validate_environment(api_key, headers)
    _is_function_call = False
    json_schemas: dict = {}
    messages = copy.deepcopy(messages)
    optional_params = copy.deepcopy(optional_params)
    if model in custom_prompt_dict:
@ -162,17 +155,15 @@ def completion(
    ## Handle Tool Calling
    if "tools" in optional_params:
        _is_function_call = True
        headers["anthropic-beta"] = "tools-2024-04-04"
        anthropic_tools = []
        for tool in optional_params["tools"]:
-            json_schemas[tool["function"]["name"]] = tool["function"].get(
+            new_tool = tool["function"]
-                "parameters", None
+            new_tool["input_schema"] = new_tool.pop("parameters")  # rename key
-            )
+            anthropic_tools.append(new_tool)
-        tool_calling_system_prompt = construct_tool_use_system_prompt(
+
-            tools=optional_params["tools"]
+        optional_params["tools"] = anthropic_tools
        )
        optional_params["system"] = (
            optional_params.get("system", "\n") + tool_calling_system_prompt
        )  # add the anthropic tool calling prompt to the system prompt
        optional_params.pop("tools")
    stream = optional_params.pop("stream", None)
@ -195,9 +186,9 @@ def completion(
    print_verbose(f"_is_function_call: {_is_function_call}")
    ## COMPLETION CALL
    if (
-        stream is not None and stream == True and _is_function_call == False
+        stream and not _is_function_call
    ):  # if function call - fake the streaming (need complete blocks for output parsing in openai format)
-        print_verbose(f"makes anthropic streaming POST request")
+        print_verbose("makes anthropic streaming POST request")
        data["stream"] = stream
        response = requests.post(
            api_base,
@ -245,46 +236,40 @@ def completion(
                status_code=response.status_code,
            )
        else:
-            text_content = completion_response["content"][0].get("text", None)
+            text_content = ""
-            ## TOOL CALLING - OUTPUT PARSE
+            tool_calls = []
-            if text_content is not None and contains_tag("invoke", text_content):
+            for content in completion_response["content"]:
-                function_name = extract_between_tags("tool_name", text_content)[0]
+                if content["type"] == "text":
-                function_arguments_str = extract_between_tags("invoke", text_content)[
+                    text_content += content["text"]
-                    0
+                ## TOOL CALLING
-                ].strip()
+                elif content["type"] == "tool_use":
-                function_arguments_str = f"<invoke>{function_arguments_str}</invoke>"
+                    tool_calls.append(
                function_arguments = parse_xml_params(
                    function_arguments_str,
                    json_schema=json_schemas.get(
                        function_name, None
                    ),  # check if we have a json schema for this function name
                )
                _message = litellm.Message(
                    tool_calls=[
                        {
-                            "id": f"call_{uuid.uuid4()}",
+                            "id": content["id"],
                            "type": "function",
                            "function": {
-                                "name": function_name,
+                                "name": content["name"],
-                                "arguments": json.dumps(function_arguments),
+                                "arguments": json.dumps(content["input"]),
                            },
                        }
-                    ],
+                    )
-                    content=None,
+
-                )
+            _message = litellm.Message(
-                model_response.choices[0].message = _message  # type: ignore
+                tool_calls=tool_calls,
-                model_response._hidden_params["original_response"] = (
+                content=text_content or None,
-                    text_content  # allow user to access raw anthropic tool calling response
+            )
-                )
+            model_response.choices[0].message = _message  # type: ignore
-            else:
+            model_response._hidden_params["original_response"] = completion_response[
-                model_response.choices[0].message.content = text_content  # type: ignore
+                "content"
            ]  # allow user to access raw anthropic tool calling response
            model_response.choices[0].finish_reason = map_finish_reason(
                completion_response["stop_reason"]
            )
        print_verbose(f"_is_function_call: {_is_function_call}; stream: {stream}")
-        if _is_function_call == True and stream is not None and stream == True:
+        if _is_function_call and stream:
-            print_verbose(f"INSIDE ANTHROPIC STREAMING TOOL CALLING CONDITION BLOCK")
+            print_verbose("INSIDE ANTHROPIC STREAMING TOOL CALLING CONDITION BLOCK")
            # return an iterator
            streaming_model_response = ModelResponse(stream=True)
            streaming_model_response.choices[0].finish_reason = model_response.choices[
@ -318,7 +303,7 @@ def completion(
                    model_response=streaming_model_response
                )
                print_verbose(
-                    f"Returns anthropic CustomStreamWrapper with 'cached_response' streaming object"
+                    "Returns anthropic CustomStreamWrapper with 'cached_response' streaming object"
                )
                return CustomStreamWrapper(
                    completion_stream=completion_stream,
@ -337,7 +322,7 @@ def completion(
        usage = Usage(
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
-            total_tokens=prompt_tokens + completion_tokens,
+            total_tokens=total_tokens,
        )
        model_response.usage = usage
        return model_response
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@ -746,7 +746,7 @@ def completion(
                    ]
                # Format rest of message according to anthropic guidelines
                messages = prompt_factory(
-                    model=model, messages=messages, custom_llm_provider="anthropic"
+                    model=model, messages=messages, custom_llm_provider="anthropic_xml"
                )
                ## LOAD CONFIG
                config = litellm.AmazonAnthropicClaude3Config.get_config()
@ -1108,6 +1108,7 @@ def completion(
            raise BedrockError(status_code=500, message=traceback.format_exc())
 class ModelResponseIterator:
    def __init__(self, model_response):
        self.model_response = model_response
@ -1133,6 +1134,7 @@ class ModelResponseIterator:
        self.is_done = True
        return self.model_response
 def _embedding_func_single(
    model: str,
    input: str,
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -556,7 +556,9 @@ def convert_to_anthropic_image_obj(openai_image_url: str):
        )
-def convert_to_anthropic_tool_result(message: dict) -> str:
+# The following XML functions will be deprecated once JSON schema support is available on Bedrock and Vertex
 # ------------------------------------------------------------------------------
 def convert_to_anthropic_tool_result_xml(message: dict) -> str:
    """
    OpenAI message with a tool result looks like:
    {
@ -606,7 +608,7 @@ def convert_to_anthropic_tool_result(message: dict) -> str:
    return anthropic_tool_result
-def convert_to_anthropic_tool_invoke(tool_calls: list) -> str:
+def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str:
    invokes = ""
    for tool in tool_calls:
        if tool["type"] != "function":
@ -631,7 +633,7 @@ def convert_to_anthropic_tool_invoke(tool_calls: list) -> str:
    return anthropic_tool_invoke
-def anthropic_messages_pt(messages: list):
+def anthropic_messages_pt_xml(messages: list):
    """
    format messages for anthropic
    1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant)
@ -720,6 +722,185 @@ def anthropic_messages_pt(messages: list):
    return new_messages
 # ------------------------------------------------------------------------------
 def convert_to_anthropic_tool_result(message: dict) -> dict:
    """
    OpenAI message with a tool result looks like:
    {
        "tool_call_id": "tool_1",
        "role": "tool",
        "name": "get_current_weather",
        "content": "function result goes here",
    },
    """
    """
    Anthropic tool_results look like:
    {
        "role": "user",
        "content": [
            {
                "type": "tool_result",
                "tool_use_id": "toolu_01A09q90qw90lq917835lq9",
                "content": "ConnectionError: the weather service API is not available (HTTP 500)",
                # "is_error": true
            }
        ]
    }
    """
    tool_call_id = message.get("tool_call_id")
    content = message.get("content")
    # We can't determine from openai message format whether it's a successful or
    # error call result so default to the successful result template
    anthropic_tool_result = {
        "type": "tool_result",
        "tool_use_id": tool_call_id,
        "content": content,
    }
    return anthropic_tool_result
 def convert_to_anthropic_tool_invoke(tool_calls: list) -> list:
    """
    OpenAI tool invokes:
    {
      "role": "assistant",
      "content": null,
      "tool_calls": [
        {
          "id": "call_abc123",
          "type": "function",
          "function": {
            "name": "get_current_weather",
            "arguments": "{\n\"location\": \"Boston, MA\"\n}"
          }
        }
      ]
    },
    """
    """
    Anthropic tool invokes:
    {
      "role": "assistant",
      "content": [
        {
          "type": "text",
          "text": "<thinking>To answer this question, I will: 1. Use the get_weather tool to get the current weather in San Francisco. 2. Use the get_time tool to get the current time in the America/Los_Angeles timezone, which covers San Francisco, CA.</thinking>"
        },
        {
          "type": "tool_use",
          "id": "toolu_01A09q90qw90lq917835lq9",
          "name": "get_weather",
          "input": {"location": "San Francisco, CA"}
        }
      ]
    }
    """
    anthropic_tool_invoke = [
        {
            "type": "tool_use",
            "id": tool["id"],
            "name": tool["function"]["name"],
            "input": json.loads(tool["function"]["arguments"]),
        }
        for tool in tool_calls
        if tool["type"] == "function"
    ]
    return anthropic_tool_invoke
 def anthropic_messages_pt(messages: list):
    """
    format messages for anthropic
    1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant)
    2. The first message always needs to be of role "user"
    3. Each message must alternate between "user" and "assistant" (this is not addressed as now by litellm)
    4. final assistant content cannot end with trailing whitespace (anthropic raises an error otherwise)
    5. System messages are a separate param to the Messages API
    6. Ensure we only accept role, content. (message.name is not supported)
    """
    # add role=tool support to allow function call result/error submission
    user_message_types = {"user", "tool"}
    # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, merge them.
    new_messages = []
    msg_i = 0
    while msg_i < len(messages):
        user_content = []
        ## MERGE CONSECUTIVE USER CONTENT ##
        while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types:
            if isinstance(messages[msg_i]["content"], list):
                for m in messages[msg_i]["content"]:
                    if m.get("type", "") == "image_url":
                        user_content.append(
                            {
                                "type": "image",
                                "source": convert_to_anthropic_image_obj(
                                    m["image_url"]["url"]
                                ),
                            }
                        )
                    elif m.get("type", "") == "text":
                        user_content.append({"type": "text", "text": m["text"]})
            elif messages[msg_i]["role"] == "tool":
                # OpenAI's tool message content will always be a string
                user_content.append(convert_to_anthropic_tool_result(messages[msg_i]))
            else:
                user_content.append(
                    {"type": "text", "text": messages[msg_i]["content"]}
                )
            msg_i += 1
        if user_content:
            new_messages.append({"role": "user", "content": user_content})
        assistant_content = []
        ## MERGE CONSECUTIVE ASSISTANT CONTENT ##
        while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
            assistant_text = (
                messages[msg_i].get("content") or ""
            )  # either string or none
            if assistant_text:
                assistant_content.append({"type": "text", "text": assistant_text})
            if messages[msg_i].get(
                "tool_calls", []
            ):  # support assistant tool invoke convertion
                assistant_content.extend(
                    convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"])
                )
            msg_i += 1
        if assistant_content:
            new_messages.append({"role": "assistant", "content": assistant_content})
    if new_messages[0]["role"] != "user":
        if litellm.modify_params:
            new_messages.insert(
                0, {"role": "user", "content": [{"type": "text", "text": "."}]}
            )
        else:
            raise Exception(
                "Invalid first message. Should always start with 'role'='user' for Anthropic. System prompt is sent separately for Anthropic. set 'litellm.modify_params = True' or 'litellm_settings:modify_params = True' on proxy, to insert a placeholder user message - '.' as the first message, "
            )
    if new_messages[-1]["role"] == "assistant":
        for content in new_messages[-1]["content"]:
            if isinstance(content, dict) and content["type"] == "text":
                content["text"] = content[
                    "text"
                ].rstrip()  # no trailing whitespace for final assistant message
    return new_messages
 def extract_between_tags(tag: str, string: str, strip: bool = False) -> List[str]:
    ext_list = re.findall(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL)
    if strip:
@ -1081,6 +1262,8 @@ def prompt_factory(
        if model == "claude-instant-1" or model == "claude-2":
            return anthropic_pt(messages=messages)
        return anthropic_messages_pt(messages=messages)
    elif custom_llm_provider == "anthropic_xml":
        return anthropic_messages_pt_xml(messages=messages)
    elif custom_llm_provider == "together_ai":
        prompt_format, chat_template = get_model_info(token=api_key, model=model)
        return format_prompt_togetherai(
--- a/litellm/llms/vertex_ai_anthropic.py
+++ b/litellm/llms/vertex_ai_anthropic.py
@ -189,7 +189,7 @@ def completion(
        # Format rest of message according to anthropic guidelines
        try:
            messages = prompt_factory(
-                model=model, messages=messages, custom_llm_provider="anthropic"
+                model=model, messages=messages, custom_llm_provider="anthropic_xml"
            )
        except Exception as e:
            raise VertexAIError(status_code=400, message=str(e))
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -207,6 +207,8 @@ def map_finish_reason(
        return "stop"
    elif finish_reason == "max_tokens":  # anthropic
        return "length"
    elif finish_reason == "tool_use":  # anthropic
        return "tool_calls"
    return finish_reason