(refactor) anthropic - move _process_response in transformation.py (#6834)

* move _process_response in transformation * fix AnthropicConfig test
2024-11-20 17:24:19 -08:00 · 2024-11-20 17:24:19 -08:00 · 434b1d3d86
commit 434b1d3d86
parent b11bc0374e
3 changed files with 188 additions and 178 deletions
--- a/litellm/llms/anthropic/chat/handler.py
+++ b/litellm/llms/anthropic/chat/handler.py
@ -45,9 +45,7 @@ from litellm.types.llms.openai import (
    ChatCompletionUsageBlock,
 )
 from litellm.types.utils import GenericStreamingChunk
-from litellm.types.utils import Message as LitellmMessage
+from litellm.utils import CustomStreamWrapper, ModelResponse
 from litellm.types.utils import PromptTokensDetailsWrapper
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
 from ...base import BaseLLM
 from ..common_utils import AnthropicError, process_anthropic_headers
@ -201,163 +199,6 @@ class AnthropicChatCompletion(BaseLLM):
    def __init__(self) -> None:
        super().__init__()
    def _process_response(
        self,
        model: str,
        response: Union[requests.Response, httpx.Response],
        model_response: ModelResponse,
        stream: bool,
        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,  # type: ignore
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
        messages: List,
        print_verbose,
        encoding,
        json_mode: bool,
    ) -> ModelResponse:
        _hidden_params: Dict = {}
        _hidden_params["additional_headers"] = process_anthropic_headers(
            dict(response.headers)
        )
        ## LOGGING
        logging_obj.post_call(
            input=messages,
            api_key=api_key,
            original_response=response.text,
            additional_args={"complete_input_dict": data},
        )
        print_verbose(f"raw model_response: {response.text}")
        ## RESPONSE OBJECT
        try:
            completion_response = response.json()
        except Exception as e:
            response_headers = getattr(response, "headers", None)
            raise AnthropicError(
                message="Unable to get json response - {}, Original Response: {}".format(
                    str(e), response.text
                ),
                status_code=response.status_code,
                headers=response_headers,
            )
        if "error" in completion_response:
            response_headers = getattr(response, "headers", None)
            raise AnthropicError(
                message=str(completion_response["error"]),
                status_code=response.status_code,
                headers=response_headers,
            )
        else:
            text_content = ""
            tool_calls: List[ChatCompletionToolCallChunk] = []
            for idx, content in enumerate(completion_response["content"]):
                if content["type"] == "text":
                    text_content += content["text"]
                ## TOOL CALLING
                elif content["type"] == "tool_use":
                    tool_calls.append(
                        ChatCompletionToolCallChunk(
                            id=content["id"],
                            type="function",
                            function=ChatCompletionToolCallFunctionChunk(
                                name=content["name"],
                                arguments=json.dumps(content["input"]),
                            ),
                            index=idx,
                        )
                    )
            _message = litellm.Message(
                tool_calls=tool_calls,
                content=text_content or None,
            )
            ## HANDLE JSON MODE - anthropic returns single function call
            if json_mode and len(tool_calls) == 1:
                json_mode_content_str: Optional[str] = tool_calls[0]["function"].get(
                    "arguments"
                )
                if json_mode_content_str is not None:
                    _converted_message = self._convert_tool_response_to_message(
                        tool_calls=tool_calls,
                    )
                    if _converted_message is not None:
                        completion_response["stop_reason"] = "stop"
                        _message = _converted_message
            model_response.choices[0].message = _message  # type: ignore
            model_response._hidden_params["original_response"] = completion_response[
                "content"
            ]  # allow user to access raw anthropic tool calling response
            model_response.choices[0].finish_reason = map_finish_reason(
                completion_response["stop_reason"]
            )
        ## CALCULATING USAGE
        prompt_tokens = completion_response["usage"]["input_tokens"]
        completion_tokens = completion_response["usage"]["output_tokens"]
        _usage = completion_response["usage"]
        cache_creation_input_tokens: int = 0
        cache_read_input_tokens: int = 0
        model_response.created = int(time.time())
        model_response.model = model
        if "cache_creation_input_tokens" in _usage:
            cache_creation_input_tokens = _usage["cache_creation_input_tokens"]
            prompt_tokens += cache_creation_input_tokens
        if "cache_read_input_tokens" in _usage:
            cache_read_input_tokens = _usage["cache_read_input_tokens"]
            prompt_tokens += cache_read_input_tokens
        prompt_tokens_details = PromptTokensDetailsWrapper(
            cached_tokens=cache_read_input_tokens
        )
        total_tokens = prompt_tokens + completion_tokens
        usage = Usage(
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
            total_tokens=total_tokens,
            prompt_tokens_details=prompt_tokens_details,
            cache_creation_input_tokens=cache_creation_input_tokens,
            cache_read_input_tokens=cache_read_input_tokens,
        )
        setattr(model_response, "usage", usage)  # type: ignore
        model_response._hidden_params = _hidden_params
        return model_response
    @staticmethod
    def _convert_tool_response_to_message(
        tool_calls: List[ChatCompletionToolCallChunk],
    ) -> Optional[LitellmMessage]:
        """
        In JSON mode, Anthropic API returns JSON schema as a tool call, we need to convert it to a message to follow the OpenAI format
        """
        ## HANDLE JSON MODE - anthropic returns single function call
        json_mode_content_str: Optional[str] = tool_calls[0]["function"].get(
            "arguments"
        )
        try:
            if json_mode_content_str is not None:
                args = json.loads(json_mode_content_str)
                if (
                    isinstance(args, dict)
                    and (values := args.get("values")) is not None
                ):
                    _message = litellm.Message(content=json.dumps(values))
                    return _message
                else:
                    # a lot of the times the `values` key is not present in the tool response
                    # relevant issue: https://github.com/BerriAI/litellm/issues/6741
                    _message = litellm.Message(content=json.dumps(args))
                    return _message
        except json.JSONDecodeError:
            # json decode error does occur, return the original tool response str
            return litellm.Message(content=json_mode_content_str)
        return None
    async def acompletion_stream_function(
        self,
        model: str,
@ -454,7 +295,7 @@ class AnthropicChatCompletion(BaseLLM):
                headers=error_headers,
            )
-        return self._process_response(
+        return AnthropicConfig._process_response(
            model=model,
            response=response,
            model_response=model_response,
@ -630,7 +471,7 @@ class AnthropicChatCompletion(BaseLLM):
                        headers=error_headers,
                    )
-        return self._process_response(
+        return AnthropicConfig._process_response(
            model=model,
            response=response,
            model_response=model_response,
@ -855,7 +696,7 @@ class ModelResponseIterator:
            tool_use: The ChatCompletionToolCallChunk to use in the chunk response
        """
        if self.json_mode is True and tool_use is not None:
-            message = AnthropicChatCompletion._convert_tool_response_to_message(
+            message = AnthropicConfig._convert_tool_response_to_message(
                tool_calls=[tool_use]
            )
            if message is not None:
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -1,7 +1,14 @@
 import json
 import time
 import types
-from typing import List, Literal, Optional, Tuple, Union
+from re import A
 from typing import Dict, List, Literal, Optional, Tuple, Union
 import httpx
 import requests
 import litellm
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.llms.prompt_templates.factory import anthropic_messages_pt
 from litellm.types.llms.anthropic import (
    AllAnthropicToolsValues,
@ -18,12 +25,23 @@ from litellm.types.llms.openai import (
    AllMessageValues,
    ChatCompletionCachedContent,
    ChatCompletionSystemMessage,
    ChatCompletionToolCallChunk,
    ChatCompletionToolCallFunctionChunk,
    ChatCompletionToolParam,
    ChatCompletionToolParamFunctionChunk,
    ChatCompletionUsageBlock,
 )
 from litellm.types.utils import Message as LitellmMessage
 from litellm.types.utils import PromptTokensDetailsWrapper
 from litellm.utils import (
    CustomStreamWrapper,
    ModelResponse,
    Usage,
    add_dummy_tool,
    has_tool_call_blocks,
 )
 from litellm.utils import add_dummy_tool, has_tool_call_blocks
-from ..common_utils import AnthropicError
+from ..common_utils import AnthropicError, process_anthropic_headers
 class AnthropicConfig:
@ -534,3 +552,162 @@ class AnthropicConfig:
        if not is_vertex_request:
            data["model"] = model
        return data
    @staticmethod
    def _process_response(
        model: str,
        response: Union[requests.Response, httpx.Response],
        model_response: ModelResponse,
        stream: bool,
        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,  # type: ignore
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
        messages: List,
        print_verbose,
        encoding,
        json_mode: bool,
    ) -> ModelResponse:
        _hidden_params: Dict = {}
        _hidden_params["additional_headers"] = process_anthropic_headers(
            dict(response.headers)
        )
        ## LOGGING
        logging_obj.post_call(
            input=messages,
            api_key=api_key,
            original_response=response.text,
            additional_args={"complete_input_dict": data},
        )
        print_verbose(f"raw model_response: {response.text}")
        ## RESPONSE OBJECT
        try:
            completion_response = response.json()
        except Exception as e:
            response_headers = getattr(response, "headers", None)
            raise AnthropicError(
                message="Unable to get json response - {}, Original Response: {}".format(
                    str(e), response.text
                ),
                status_code=response.status_code,
                headers=response_headers,
            )
        if "error" in completion_response:
            response_headers = getattr(response, "headers", None)
            raise AnthropicError(
                message=str(completion_response["error"]),
                status_code=response.status_code,
                headers=response_headers,
            )
        else:
            text_content = ""
            tool_calls: List[ChatCompletionToolCallChunk] = []
            for idx, content in enumerate(completion_response["content"]):
                if content["type"] == "text":
                    text_content += content["text"]
                ## TOOL CALLING
                elif content["type"] == "tool_use":
                    tool_calls.append(
                        ChatCompletionToolCallChunk(
                            id=content["id"],
                            type="function",
                            function=ChatCompletionToolCallFunctionChunk(
                                name=content["name"],
                                arguments=json.dumps(content["input"]),
                            ),
                            index=idx,
                        )
                    )
            _message = litellm.Message(
                tool_calls=tool_calls,
                content=text_content or None,
            )
            ## HANDLE JSON MODE - anthropic returns single function call
            if json_mode and len(tool_calls) == 1:
                json_mode_content_str: Optional[str] = tool_calls[0]["function"].get(
                    "arguments"
                )
                if json_mode_content_str is not None:
                    _converted_message = (
                        AnthropicConfig._convert_tool_response_to_message(
                            tool_calls=tool_calls,
                        )
                    )
                    if _converted_message is not None:
                        completion_response["stop_reason"] = "stop"
                        _message = _converted_message
            model_response.choices[0].message = _message  # type: ignore
            model_response._hidden_params["original_response"] = completion_response[
                "content"
            ]  # allow user to access raw anthropic tool calling response
            model_response.choices[0].finish_reason = map_finish_reason(
                completion_response["stop_reason"]
            )
        ## CALCULATING USAGE
        prompt_tokens = completion_response["usage"]["input_tokens"]
        completion_tokens = completion_response["usage"]["output_tokens"]
        _usage = completion_response["usage"]
        cache_creation_input_tokens: int = 0
        cache_read_input_tokens: int = 0
        model_response.created = int(time.time())
        model_response.model = model
        if "cache_creation_input_tokens" in _usage:
            cache_creation_input_tokens = _usage["cache_creation_input_tokens"]
            prompt_tokens += cache_creation_input_tokens
        if "cache_read_input_tokens" in _usage:
            cache_read_input_tokens = _usage["cache_read_input_tokens"]
            prompt_tokens += cache_read_input_tokens
        prompt_tokens_details = PromptTokensDetailsWrapper(
            cached_tokens=cache_read_input_tokens
        )
        total_tokens = prompt_tokens + completion_tokens
        usage = Usage(
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
            total_tokens=total_tokens,
            prompt_tokens_details=prompt_tokens_details,
            cache_creation_input_tokens=cache_creation_input_tokens,
            cache_read_input_tokens=cache_read_input_tokens,
        )
        setattr(model_response, "usage", usage)  # type: ignore
        model_response._hidden_params = _hidden_params
        return model_response
    @staticmethod
    def _convert_tool_response_to_message(
        tool_calls: List[ChatCompletionToolCallChunk],
    ) -> Optional[LitellmMessage]:
        """
        In JSON mode, Anthropic API returns JSON schema as a tool call, we need to convert it to a message to follow the OpenAI format
        """
        ## HANDLE JSON MODE - anthropic returns single function call
        json_mode_content_str: Optional[str] = tool_calls[0]["function"].get(
            "arguments"
        )
        try:
            if json_mode_content_str is not None:
                args = json.loads(json_mode_content_str)
                if (
                    isinstance(args, dict)
                    and (values := args.get("values")) is not None
                ):
                    _message = litellm.Message(content=json.dumps(values))
                    return _message
                else:
                    # a lot of the times the `values` key is not present in the tool response
                    # relevant issue: https://github.com/BerriAI/litellm/issues/6741
                    _message = litellm.Message(content=json.dumps(args))
                    return _message
        except json.JSONDecodeError:
            # json decode error does occur, return the original tool response str
            return litellm.Message(content=json_mode_content_str)
        return None
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py
@ -712,9 +712,7 @@ def test_convert_tool_response_to_message_with_values():
        )
    ]
-    message = AnthropicChatCompletion._convert_tool_response_to_message(
+    message = AnthropicConfig._convert_tool_response_to_message(tool_calls=tool_calls)
        tool_calls=tool_calls
    )
    assert message is not None
    assert message.content == '{"name": "John", "age": 30}'
@ -739,9 +737,7 @@ def test_convert_tool_response_to_message_without_values():
        )
    ]
-    message = AnthropicChatCompletion._convert_tool_response_to_message(
+    message = AnthropicConfig._convert_tool_response_to_message(tool_calls=tool_calls)
        tool_calls=tool_calls
    )
    assert message is not None
    assert message.content == '{"name": "John", "age": 30}'
@ -760,9 +756,7 @@ def test_convert_tool_response_to_message_invalid_json():
        )
    ]
-    message = AnthropicChatCompletion._convert_tool_response_to_message(
+    message = AnthropicConfig._convert_tool_response_to_message(tool_calls=tool_calls)
        tool_calls=tool_calls
    )
    assert message is not None
    assert message.content == "invalid json"
@ -779,8 +773,6 @@ def test_convert_tool_response_to_message_no_arguments():
        )
    ]
-    message = AnthropicChatCompletion._convert_tool_response_to_message(
+    message = AnthropicConfig._convert_tool_response_to_message(tool_calls=tool_calls)
        tool_calls=tool_calls
    )
    assert message is None