Merge pull request #4635 from BerriAI/litellm_anthropic_adapter

Anthropic `/v1/messages` endpoint support
2024-07-10 22:41:53 -07:00 · 2024-07-10 22:41:53 -07:00 · dacce3d78b
commit dacce3d78b
parent b2f4cd9b56 22bcbac9ea
15 changed files with 1163 additions and 161 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -882,3 +882,8 @@ from .batches.main import *
 from .files.main import *
 from .scheduler import *
 from .cost_calculator import response_cost_calculator, cost_per_token
+
+### ADAPTERS ###
+from .types.adapter import AdapterItem
+
+adapters: List[AdapterItem] = []
--- a/litellm/adapters/anthropic_adapter.py
+++ b/litellm/adapters/anthropic_adapter.py
@ -0,0 +1,50 @@
+# What is this?
+## Translates OpenAI call to Anthropic `/v1/messages` format
+import json
+import os
+import traceback
+import uuid
+from typing import Literal, Optional
+
+import dotenv
+import httpx
+from pydantic import BaseModel
+
+import litellm
+from litellm import ChatCompletionRequest, verbose_logger
+from litellm.integrations.custom_logger import CustomLogger
+from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse
+
+
+class AnthropicAdapter(CustomLogger):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def translate_completion_input_params(
+        self, kwargs
+    ) -> Optional[ChatCompletionRequest]:
+        """
+        - translate params, where needed
+        - pass rest, as is
+        """
+        request_body = AnthropicMessagesRequest(**kwargs)  # type: ignore
+
+        translated_body = litellm.AnthropicConfig().translate_anthropic_to_openai(
+            anthropic_message_request=request_body
+        )
+
+        return translated_body
+
+    def translate_completion_output_params(
+        self, response: litellm.ModelResponse
+    ) -> Optional[AnthropicResponse]:
+
+        return litellm.AnthropicConfig().translate_openai_response_to_anthropic(
+            response=response
+        )
+
+    def translate_completion_output_params_streaming(self) -> Optional[BaseModel]:
+        return super().translate_completion_output_params_streaming()
+
+
+anthropic_adapter = AnthropicAdapter()
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -5,9 +5,12 @@ import traceback
 from typing import Literal, Optional, Union

 import dotenv
+from pydantic import BaseModel

 from litellm.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.types.llms.openai import ChatCompletionRequest
+from litellm.types.utils import ModelResponse


 class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
@ -55,6 +58,30 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
    def pre_call_check(self, deployment: dict) -> Optional[dict]:
        pass

+    #### ADAPTERS #### Allow calling 100+ LLMs in custom format - https://github.com/BerriAI/litellm/pulls
+
+    def translate_completion_input_params(
+        self, kwargs
+    ) -> Optional[ChatCompletionRequest]:
+        """
+        Translates the input params, from the provider's native format to the litellm.completion() format.
+        """
+        pass
+
+    def translate_completion_output_params(
+        self, response: ModelResponse
+    ) -> Optional[BaseModel]:
+        """
+        Translates the output params, from the OpenAI format to the custom format.
+        """
+        pass
+
+    def translate_completion_output_params_streaming(self) -> Optional[BaseModel]:
+        """
+        Translates the streaming chunk, from the OpenAI format to the custom format.
+        """
+        pass
+
    #### CALL HOOKS - proxy only ####
    """
    Control the modify incoming / outgoung data before calling the model
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -20,19 +20,43 @@ from litellm.llms.custom_httpx.http_handler import (
    _get_httpx_client,
 )
 from litellm.types.llms.anthropic import (
+    AnthopicMessagesAssistantMessageParam,
+    AnthropicFinishReason,
+    AnthropicMessagesRequest,
+    AnthropicMessagesTool,
    AnthropicMessagesToolChoice,
+    AnthropicMessagesUserMessageParam,
+    AnthropicResponse,
+    AnthropicResponseContentBlockText,
+    AnthropicResponseContentBlockToolUse,
+    AnthropicResponseUsageBlock,
    ContentBlockDelta,
    ContentBlockStart,
    MessageBlockDelta,
    MessageStartBlock,
 )
 from litellm.types.llms.openai import (
+    AllMessageValues,
+    ChatCompletionAssistantMessage,
+    ChatCompletionAssistantToolCall,
+    ChatCompletionImageObject,
+    ChatCompletionImageUrlObject,
+    ChatCompletionRequest,
    ChatCompletionResponseMessage,
+    ChatCompletionSystemMessage,
+    ChatCompletionTextObject,
    ChatCompletionToolCallChunk,
    ChatCompletionToolCallFunctionChunk,
+    ChatCompletionToolChoiceFunctionParam,
+    ChatCompletionToolChoiceObjectParam,
+    ChatCompletionToolChoiceValues,
+    ChatCompletionToolMessage,
+    ChatCompletionToolParam,
+    ChatCompletionToolParamFunctionChunk,
    ChatCompletionUsageBlock,
+    ChatCompletionUserMessage,
 )
-from litellm.types.utils import GenericStreamingChunk
+from litellm.types.utils import Choices, GenericStreamingChunk
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage

 from .base import BaseLLM
@ -168,6 +192,287 @@ class AnthropicConfig:
                optional_params["top_p"] = value
        return optional_params

+    ### FOR [BETA] `/v1/messages` endpoint support
+
+    def translatable_anthropic_params(self) -> List:
+        """
+        Which anthropic params, we need to translate to the openai format.
+        """
+        return ["messages", "metadata", "system", "tool_choice", "tools"]
+
+    def translate_anthropic_messages_to_openai(
+        self,
+        messages: List[
+            Union[
+                AnthropicMessagesUserMessageParam,
+                AnthopicMessagesAssistantMessageParam,
+            ]
+        ],
+    ) -> List:
+        new_messages: List[AllMessageValues] = []
+        for m in messages:
+            user_message: Optional[ChatCompletionUserMessage] = None
+            tool_message_list: List[ChatCompletionToolMessage] = []
+            ## USER MESSAGE ##
+            if m["role"] == "user":
+                ## translate user message
+                if isinstance(m["content"], str):
+                    user_message = ChatCompletionUserMessage(
+                        role="user", content=m["content"]
+                    )
+                elif isinstance(m["content"], list):
+                    new_user_content_list: List[
+                        Union[ChatCompletionTextObject, ChatCompletionImageObject]
+                    ] = []
+                    for content in m["content"]:
+                        if content["type"] == "text":
+                            text_obj = ChatCompletionTextObject(
+                                type="text", text=content["text"]
+                            )
+                            new_user_content_list.append(text_obj)
+                        elif content["type"] == "image":
+                            image_url = ChatCompletionImageUrlObject(
+                                url=f"data:{content['type']};base64,{content['source']}"
+                            )
+                            image_obj = ChatCompletionImageObject(
+                                type="image_url", image_url=image_url
+                            )
+
+                            new_user_content_list.append(image_obj)
+                        elif content["type"] == "tool_result":
+                            if "content" not in content:
+                                tool_result = ChatCompletionToolMessage(
+                                    role="tool",
+                                    tool_call_id=content["tool_use_id"],
+                                    content="",
+                                )
+                                tool_message_list.append(tool_result)
+                            elif isinstance(content["content"], str):
+                                tool_result = ChatCompletionToolMessage(
+                                    role="tool",
+                                    tool_call_id=content["tool_use_id"],
+                                    content=content["content"],
+                                )
+                                tool_message_list.append(tool_result)
+                            elif isinstance(content["content"], list):
+                                for c in content["content"]:
+                                    if c["type"] == "text":
+                                        tool_result = ChatCompletionToolMessage(
+                                            role="tool",
+                                            tool_call_id=content["tool_use_id"],
+                                            content=c["text"],
+                                        )
+                                        tool_message_list.append(tool_result)
+                                    elif c["type"] == "image":
+                                        image_str = (
+                                            f"data:{c['type']};base64,{c['source']}"
+                                        )
+                                        tool_result = ChatCompletionToolMessage(
+                                            role="tool",
+                                            tool_call_id=content["tool_use_id"],
+                                            content=image_str,
+                                        )
+                                        tool_message_list.append(tool_result)
+
+            if user_message is not None:
+                new_messages.append(user_message)
+
+            if len(tool_message_list) > 0:
+                new_messages.extend(tool_message_list)
+
+            ## ASSISTANT MESSAGE ##
+            assistant_message_str: Optional[str] = None
+            tool_calls: List[ChatCompletionAssistantToolCall] = []
+            if m["role"] == "assistant":
+                if isinstance(m["content"], str):
+                    assistant_message_str = m["content"]
+                elif isinstance(m["content"], list):
+                    for content in m["content"]:
+                        if content["type"] == "text":
+                            if assistant_message_str is None:
+                                assistant_message_str = content["text"]
+                            else:
+                                assistant_message_str += content["text"]
+                        elif content["type"] == "tool_use":
+                            function_chunk = ChatCompletionToolCallFunctionChunk(
+                                name=content["name"],
+                                arguments=json.dumps(content["input"]),
+                            )
+
+                            tool_calls.append(
+                                ChatCompletionAssistantToolCall(
+                                    id=content["id"],
+                                    type="function",
+                                    function=function_chunk,
+                                )
+                            )
+
+            if assistant_message_str is not None or len(tool_calls) > 0:
+                assistant_message = ChatCompletionAssistantMessage(
+                    role="assistant",
+                    content=assistant_message_str,
+                )
+                if len(tool_calls) > 0:
+                    assistant_message["tool_calls"] = tool_calls
+                new_messages.append(assistant_message)
+
+        return new_messages
+
+    def translate_anthropic_tool_choice_to_openai(
+        self, tool_choice: AnthropicMessagesToolChoice
+    ) -> ChatCompletionToolChoiceValues:
+        if tool_choice["type"] == "any":
+            return "required"
+        elif tool_choice["type"] == "auto":
+            return "auto"
+        elif tool_choice["type"] == "tool":
+            tc_function_param = ChatCompletionToolChoiceFunctionParam(
+                name=tool_choice.get("name", "")
+            )
+            return ChatCompletionToolChoiceObjectParam(
+                type="function", function=tc_function_param
+            )
+        else:
+            raise ValueError(
+                "Incompatible tool choice param submitted - {}".format(tool_choice)
+            )
+
+    def translate_anthropic_tools_to_openai(
+        self, tools: List[AnthropicMessagesTool]
+    ) -> List[ChatCompletionToolParam]:
+        new_tools: List[ChatCompletionToolParam] = []
+        for tool in tools:
+            function_chunk = ChatCompletionToolParamFunctionChunk(
+                name=tool["name"],
+                parameters=tool["input_schema"],
+            )
+            if "description" in tool:
+                function_chunk["description"] = tool["description"]
+            new_tools.append(
+                ChatCompletionToolParam(type="function", function=function_chunk)
+            )
+
+        return new_tools
+
+    def translate_anthropic_to_openai(
+        self, anthropic_message_request: AnthropicMessagesRequest
+    ) -> ChatCompletionRequest:
+        """
+        This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
+        """
+        new_messages: List[AllMessageValues] = []
+
+        ## CONVERT ANTHROPIC MESSAGES TO OPENAI
+        new_messages = self.translate_anthropic_messages_to_openai(
+            messages=anthropic_message_request["messages"]
+        )
+        ## ADD SYSTEM MESSAGE TO MESSAGES
+        if "system" in anthropic_message_request:
+            new_messages.insert(
+                0,
+                ChatCompletionSystemMessage(
+                    role="system", content=anthropic_message_request["system"]
+                ),
+            )
+
+        new_kwargs: ChatCompletionRequest = {
+            "model": anthropic_message_request["model"],
+            "messages": new_messages,
+        }
+        ## CONVERT METADATA (user_id)
+        if "metadata" in anthropic_message_request:
+            if "user_id" in anthropic_message_request["metadata"]:
+                new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
+
+        ## CONVERT TOOL CHOICE
+        if "tool_choice" in anthropic_message_request:
+            new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
+                tool_choice=anthropic_message_request["tool_choice"]
+            )
+        ## CONVERT TOOLS
+        if "tools" in anthropic_message_request:
+            new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
+                tools=anthropic_message_request["tools"]
+            )
+
+        translatable_params = self.translatable_anthropic_params()
+        for k, v in anthropic_message_request.items():
+            if k not in translatable_params:  # pass remaining params as is
+                new_kwargs[k] = v  # type: ignore
+
+        return new_kwargs
+
+    def _translate_openai_content_to_anthropic(
+        self, choices: List[Choices]
+    ) -> List[
+        Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
+    ]:
+        new_content: List[
+            Union[
+                AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
+            ]
+        ] = []
+        for choice in choices:
+            if (
+                choice.message.tool_calls is not None
+                and len(choice.message.tool_calls) > 0
+            ):
+                for tool_call in choice.message.tool_calls:
+                    new_content.append(
+                        AnthropicResponseContentBlockToolUse(
+                            type="tool_use",
+                            id=tool_call.id,
+                            name=tool_call.function.name or "",
+                            input=json.loads(tool_call.function.arguments),
+                        )
+                    )
+            elif choice.message.content is not None:
+                new_content.append(
+                    AnthropicResponseContentBlockText(
+                        type="text", text=choice.message.content
+                    )
+                )
+
+        return new_content
+
+    def _translate_openai_finish_reason_to_anthropic(
+        self, openai_finish_reason: str
+    ) -> AnthropicFinishReason:
+        if openai_finish_reason == "stop":
+            return "end_turn"
+        elif openai_finish_reason == "length":
+            return "max_tokens"
+        elif openai_finish_reason == "tool_calls":
+            return "tool_use"
+        return "end_turn"
+
+    def translate_openai_response_to_anthropic(
+        self, response: litellm.ModelResponse
+    ) -> AnthropicResponse:
+        ## translate content block
+        anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices)  # type: ignore
+        ## extract finish reason
+        anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
+            openai_finish_reason=response.choices[0].finish_reason  # type: ignore
+        )
+        # extract usage
+        usage: litellm.Usage = getattr(response, "usage")
+        anthropic_usage = AnthropicResponseUsageBlock(
+            input_tokens=usage.prompt_tokens, output_tokens=usage.completion_tokens
+        )
+        translated_obj = AnthropicResponse(
+            id=response.id,
+            type="message",
+            role="assistant",
+            model=response.model or "unknown-model",
+            stop_sequence=None,
+            usage=anthropic_usage,
+            content=anthropic_content,
+            stop_reason=anthropic_finish_reason,
+        )
+
+        return translated_obj
+

 # makes headers for API call
 def validate_environment(api_key, user_headers):
@ -231,121 +536,6 @@ class AnthropicChatCompletion(BaseLLM):
    def __init__(self) -> None:
        super().__init__()

-    # def process_streaming_response(
-    #     self,
-    #     model: str,
-    #     response: Union[requests.Response, httpx.Response],
-    #     model_response: ModelResponse,
-    #     stream: bool,
-    #     logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
-    #     optional_params: dict,
-    #     api_key: str,
-    #     data: Union[dict, str],
-    #     messages: List,
-    #     print_verbose,
-    #     encoding,
-    # ) -> CustomStreamWrapper:
-    #     """
-    #     Return stream object for tool-calling + streaming
-    #     """
-    #     ## LOGGING
-    #     logging_obj.post_call(
-    #         input=messages,
-    #         api_key=api_key,
-    #         original_response=response.text,
-    #         additional_args={"complete_input_dict": data},
-    #     )
-    #     print_verbose(f"raw model_response: {response.text}")
-    #     ## RESPONSE OBJECT
-    #     try:
-    #         completion_response = response.json()
-    #     except:
-    #         raise AnthropicError(
-    #             message=response.text, status_code=response.status_code
-    #         )
-    #     text_content = ""
-    #     tool_calls = []
-    #     for content in completion_response["content"]:
-    #         if content["type"] == "text":
-    #             text_content += content["text"]
-    #         ## TOOL CALLING
-    #         elif content["type"] == "tool_use":
-    #             tool_calls.append(
-    #                 {
-    #                     "id": content["id"],
-    #                     "type": "function",
-    #                     "function": {
-    #                         "name": content["name"],
-    #                         "arguments": json.dumps(content["input"]),
-    #                     },
-    #                 }
-    #             )
-    #     if "error" in completion_response:
-    #         raise AnthropicError(
-    #             message=str(completion_response["error"]),
-    #             status_code=response.status_code,
-    #         )
-    #     _message = litellm.Message(
-    #         tool_calls=tool_calls,
-    #         content=text_content or None,
-    #     )
-    #     model_response.choices[0].message = _message  # type: ignore
-    #     model_response._hidden_params["original_response"] = completion_response[
-    #         "content"
-    #     ]  # allow user to access raw anthropic tool calling response
-
-    #     model_response.choices[0].finish_reason = map_finish_reason(
-    #         completion_response["stop_reason"]
-    #     )
-
-    #     print_verbose("INSIDE ANTHROPIC STREAMING TOOL CALLING CONDITION BLOCK")
-    #     # return an iterator
-    #     streaming_model_response = ModelResponse(stream=True)
-    #     streaming_model_response.choices[0].finish_reason = model_response.choices[  # type: ignore
-    #         0
-    #     ].finish_reason
-    #     # streaming_model_response.choices = [litellm.utils.StreamingChoices()]
-    #     streaming_choice = litellm.utils.StreamingChoices()
-    #     streaming_choice.index = model_response.choices[0].index
-    #     _tool_calls = []
-    #     print_verbose(
-    #         f"type of model_response.choices[0]: {type(model_response.choices[0])}"
-    #     )
-    #     print_verbose(f"type of streaming_choice: {type(streaming_choice)}")
-    #     if isinstance(model_response.choices[0], litellm.Choices):
-    #         if getattr(
-    #             model_response.choices[0].message, "tool_calls", None
-    #         ) is not None and isinstance(
-    #             model_response.choices[0].message.tool_calls, list
-    #         ):
-    #             for tool_call in model_response.choices[0].message.tool_calls:
-    #                 _tool_call = {**tool_call.dict(), "index": 0}
-    #                 _tool_calls.append(_tool_call)
-    #         delta_obj = litellm.utils.Delta(
-    #             content=getattr(model_response.choices[0].message, "content", None),
-    #             role=model_response.choices[0].message.role,
-    #             tool_calls=_tool_calls,
-    #         )
-    #         streaming_choice.delta = delta_obj
-    #         streaming_model_response.choices = [streaming_choice]
-    #         completion_stream = ModelResponseIterator(
-    #             model_response=streaming_model_response
-    #         )
-    #         print_verbose(
-    #             "Returns anthropic CustomStreamWrapper with 'cached_response' streaming object"
-    #         )
-    #         return CustomStreamWrapper(
-    #             completion_stream=completion_stream,
-    #             model=model,
-    #             custom_llm_provider="cached_response",
-    #             logging_obj=logging_obj,
-    #         )
-    #     else:
-    #         raise AnthropicError(
-    #             status_code=422,
-    #             message="Unprocessable response object - {}".format(response.text),
-    #         )
-
    def process_response(
        self,
        model: str,
--- a/litellm/main.py
+++ b/litellm/main.py
@ -38,6 +38,7 @@ import dotenv
 import httpx
 import openai
 import tiktoken
+from pydantic import BaseModel
 from typing_extensions import overload

 import litellm
@ -48,6 +49,7 @@ from litellm import (  # type: ignore
    get_litellm_params,
    get_optional_params,
 )
+from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.utils import (
    CustomStreamWrapper,
@ -3943,6 +3945,63 @@ def text_completion(
    return text_completion_response


+###### Adapter Completion ################
+
+
+async def aadapter_completion(*, adapter_id: str, **kwargs) -> Optional[BaseModel]:
+    """
+    Implemented to handle async calls for adapter_completion()
+    """
+    try:
+        translation_obj: Optional[CustomLogger] = None
+        for item in litellm.adapters:
+            if item["id"] == adapter_id:
+                translation_obj = item["adapter"]
+
+        if translation_obj is None:
+            raise ValueError(
+                "No matching adapter given. Received 'adapter_id'={}, litellm.adapters={}".format(
+                    adapter_id, litellm.adapters
+                )
+            )
+
+        new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)
+
+        response: ModelResponse = await acompletion(**new_kwargs)  # type: ignore
+
+        translated_response = translation_obj.translate_completion_output_params(
+            response=response
+        )
+
+        return translated_response
+    except Exception as e:
+        raise e
+
+
+def adapter_completion(*, adapter_id: str, **kwargs) -> Optional[BaseModel]:
+    translation_obj: Optional[CustomLogger] = None
+    for item in litellm.adapters:
+        if item["id"] == adapter_id:
+            translation_obj = item["adapter"]
+
+    if translation_obj is None:
+        raise ValueError(
+            "No matching adapter given. Received 'adapter_id'={}, litellm.adapters={}".format(
+                adapter_id, litellm.adapters
+            )
+        )
+
+    new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)
+
+    response: ModelResponse = completion(**new_kwargs)  # type: ignore
+
+    translated_response = translation_obj.translate_completion_output_params(
+        response=response
+    )
+
+    return translated_response
+
+
 ##### Moderation #######################


--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -2,18 +2,9 @@ model_list:
  - model_name: "*"
    litellm_params:
      model: "openai/*"
-  - model_name: gemini-1.5-flash
+  - model_name: claude-3-5-sonnet-20240620
    litellm_params:
-      model: gemini/gemini-1.5-flash
-  - model_name: whisper
-    litellm_params:
-      model: azure/azure-whisper
-      api_version: 2024-02-15-preview
-      api_base: os.environ/AZURE_EUROPE_API_BASE
-      api_key: os.environ/AZURE_EUROPE_API_KEY
-    model_info:
-      mode: audio_transcription
-
+      model: gpt-3.5-turbo


 general_settings:
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@ -71,6 +71,11 @@ azure_api_key_header = APIKeyHeader(
    auto_error=False,
    description="Some older versions of the openai Python package will send an API-Key header with just the API key ",
 )
+anthropic_api_key_header = APIKeyHeader(
+    name="x-api-key",
+    auto_error=False,
+    description="If anthropic client used.",
+)


 def _get_bearer_token(
@ -87,6 +92,9 @@ async def user_api_key_auth(
    request: Request,
    api_key: str = fastapi.Security(api_key_header),
    azure_api_key_header: str = fastapi.Security(azure_api_key_header),
+    anthropic_api_key_header: Optional[str] = fastapi.Security(
+        anthropic_api_key_header
+    ),
 ) -> UserAPIKeyAuth:

    from litellm.proxy.proxy_server import (
@ -114,6 +122,9 @@ async def user_api_key_auth(
        elif isinstance(azure_api_key_header, str):
            api_key = azure_api_key_header

+        elif isinstance(anthropic_api_key_header, str):
+            api_key = anthropic_api_key_header
+
        parent_otel_span: Optional[Span] = None
        if open_telemetry_logger is not None:
            parent_otel_span = open_telemetry_logger.tracer.start_span(
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -215,6 +215,12 @@ from litellm.router import (
 from litellm.router import ModelInfo as RouterModelInfo
 from litellm.router import updateDeployment
 from litellm.scheduler import DefaultPriorities, FlowItem, Scheduler
+from litellm.types.llms.anthropic import (
+    AnthropicMessagesRequest,
+    AnthropicResponse,
+    AnthropicResponseContentBlockText,
+    AnthropicResponseUsageBlock,
+)
 from litellm.types.llms.openai import HttpxBinaryResponseContent
 from litellm.types.router import RouterGeneralSettings

@ -5041,6 +5047,198 @@ async def moderations(
            )


+#### ANTHROPIC ENDPOINTS ####
+
+
+@router.post(
+    "/v1/messages",
+    tags=["[beta] Anthropic `/v1/messages`"],
+    dependencies=[Depends(user_api_key_auth)],
+    response_model=AnthropicResponse,
+)
+async def anthropic_response(
+    anthropic_data: AnthropicMessagesRequest,
+    fastapi_response: Response,
+    request: Request,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    from litellm import adapter_completion
+    from litellm.adapters.anthropic_adapter import anthropic_adapter
+
+    litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
+
+    global user_temperature, user_request_timeout, user_max_tokens, user_api_base
+    data: dict = {**anthropic_data, "adapter_id": "anthropic"}
+    try:
+        data["model"] = (
+            general_settings.get("completion_model", None)  # server default
+            or user_model  # model name passed via cli args
+            or data["model"]  # default passed in http request
+        )
+        if user_model:
+            data["model"] = user_model
+
+        data = await add_litellm_data_to_request(
+            data=data,  # type: ignore
+            request=request,
+            general_settings=general_settings,
+            user_api_key_dict=user_api_key_dict,
+            version=version,
+            proxy_config=proxy_config,
+        )
+
+        # override with user settings, these are params passed via cli
+        if user_temperature:
+            data["temperature"] = user_temperature
+        if user_request_timeout:
+            data["request_timeout"] = user_request_timeout
+        if user_max_tokens:
+            data["max_tokens"] = user_max_tokens
+        if user_api_base:
+            data["api_base"] = user_api_base
+
+        ### MODEL ALIAS MAPPING ###
+        # check if model name in model alias map
+        # get the actual model name
+        if data["model"] in litellm.model_alias_map:
+            data["model"] = litellm.model_alias_map[data["model"]]
+
+        ### CALL HOOKS ### - modify incoming data before calling the model
+        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
+            user_api_key_dict=user_api_key_dict, data=data, call_type="text_completion"
+        )
+
+        ### ROUTE THE REQUESTs ###
+        router_model_names = llm_router.model_names if llm_router is not None else []
+        # skip router if user passed their key
+        if "api_key" in data:
+            llm_response = asyncio.create_task(litellm.aadapter_completion(**data))
+        elif (
+            llm_router is not None and data["model"] in router_model_names
+        ):  # model in router model list
+            llm_response = asyncio.create_task(llm_router.aadapter_completion(**data))
+        elif (
+            llm_router is not None
+            and llm_router.model_group_alias is not None
+            and data["model"] in llm_router.model_group_alias
+        ):  # model set in model_group_alias
+            llm_response = asyncio.create_task(llm_router.aadapter_completion(**data))
+        elif (
+            llm_router is not None and data["model"] in llm_router.deployment_names
+        ):  # model in router deployments, calling a specific deployment on the router
+            llm_response = asyncio.create_task(
+                llm_router.aadapter_completion(**data, specific_deployment=True)
+            )
+        elif (
+            llm_router is not None and data["model"] in llm_router.get_model_ids()
+        ):  # model in router model list
+            llm_response = asyncio.create_task(llm_router.aadapter_completion(**data))
+        elif (
+            llm_router is not None
+            and data["model"] not in router_model_names
+            and llm_router.default_deployment is not None
+        ):  # model in router deployments, calling a specific deployment on the router
+            llm_response = asyncio.create_task(llm_router.aadapter_completion(**data))
+        elif user_model is not None:  # `litellm --model <your-model-name>`
+            llm_response = asyncio.create_task(litellm.aadapter_completion(**data))
+        else:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail={
+                    "error": "completion: Invalid model name passed in model="
+                    + data.get("model", "")
+                },
+            )
+
+        # Await the llm_response task
+        response = await llm_response
+
+        hidden_params = getattr(response, "_hidden_params", {}) or {}
+        model_id = hidden_params.get("model_id", None) or ""
+        cache_key = hidden_params.get("cache_key", None) or ""
+        api_base = hidden_params.get("api_base", None) or ""
+        response_cost = hidden_params.get("response_cost", None) or ""
+
+        ### ALERTING ###
+        asyncio.create_task(
+            proxy_logging_obj.update_request_status(
+                litellm_call_id=data.get("litellm_call_id", ""), status="success"
+            )
+        )
+
+        verbose_proxy_logger.debug("final response: %s", response)
+
+        fastapi_response.headers.update(
+            get_custom_headers(
+                user_api_key_dict=user_api_key_dict,
+                model_id=model_id,
+                cache_key=cache_key,
+                api_base=api_base,
+                version=version,
+                response_cost=response_cost,
+            )
+        )
+
+        verbose_proxy_logger.info("\nResponse from Litellm:\n{}".format(response))
+        return response
+    except RejectedRequestError as e:
+        _data = e.request_data
+        await proxy_logging_obj.post_call_failure_hook(
+            user_api_key_dict=user_api_key_dict,
+            original_exception=e,
+            request_data=_data,
+        )
+        if _data.get("stream", None) is not None and _data["stream"] == True:
+            _chat_response = litellm.ModelResponse()
+            _usage = litellm.Usage(
+                prompt_tokens=0,
+                completion_tokens=0,
+                total_tokens=0,
+            )
+            _chat_response.usage = _usage  # type: ignore
+            _chat_response.choices[0].message.content = e.message  # type: ignore
+            _iterator = litellm.utils.ModelResponseIterator(
+                model_response=_chat_response, convert_to_delta=True
+            )
+            _streaming_response = litellm.TextCompletionStreamWrapper(
+                completion_stream=_iterator,
+                model=_data.get("model", ""),
+            )
+
+            selected_data_generator = select_data_generator(
+                response=_streaming_response,
+                user_api_key_dict=user_api_key_dict,
+                request_data=data,
+            )
+
+            return StreamingResponse(
+                selected_data_generator,
+                media_type="text/event-stream",
+                headers={},
+            )
+        else:
+            _response = litellm.TextCompletionResponse()
+            _response.choices[0].text = e.message
+            return _response
+    except Exception as e:
+        await proxy_logging_obj.post_call_failure_hook(
+            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
+        )
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.completion(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
+        error_msg = f"{str(e)}"
+        raise ProxyException(
+            message=getattr(e, "message", error_msg),
+            type=getattr(e, "type", "None"),
+            param=getattr(e, "param", "None"),
+            code=getattr(e, "status_code", 500),
+        )
+
+
 #### DEV UTILS ####

 # @router.get(
--- a/litellm/router.py
+++ b/litellm/router.py
@ -1765,6 +1765,125 @@ class Router:
                self.fail_calls[model] += 1
            raise e

+    async def aadapter_completion(
+        self,
+        adapter_id: str,
+        model: str,
+        is_retry: Optional[bool] = False,
+        is_fallback: Optional[bool] = False,
+        is_async: Optional[bool] = False,
+        **kwargs,
+    ):
+        try:
+            kwargs["model"] = model
+            kwargs["adapter_id"] = adapter_id
+            kwargs["original_function"] = self._aadapter_completion
+            kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
+            timeout = kwargs.get("request_timeout", self.timeout)
+            kwargs.setdefault("metadata", {}).update({"model_group": model})
+            response = await self.async_function_with_fallbacks(**kwargs)
+
+            return response
+        except Exception as e:
+            asyncio.create_task(
+                send_llm_exception_alert(
+                    litellm_router_instance=self,
+                    request_kwargs=kwargs,
+                    error_traceback_str=traceback.format_exc(),
+                    original_exception=e,
+                )
+            )
+            raise e
+
+    async def _aadapter_completion(self, adapter_id: str, model: str, **kwargs):
+        try:
+            verbose_router_logger.debug(
+                f"Inside _aadapter_completion()- model: {model}; kwargs: {kwargs}"
+            )
+            deployment = await self.async_get_available_deployment(
+                model=model,
+                messages=[{"role": "user", "content": "default text"}],
+                specific_deployment=kwargs.pop("specific_deployment", None),
+            )
+            kwargs.setdefault("metadata", {}).update(
+                {
+                    "deployment": deployment["litellm_params"]["model"],
+                    "model_info": deployment.get("model_info", {}),
+                    "api_base": deployment.get("litellm_params", {}).get("api_base"),
+                }
+            )
+            kwargs["model_info"] = deployment.get("model_info", {})
+            data = deployment["litellm_params"].copy()
+            model_name = data["model"]
+            for k, v in self.default_litellm_params.items():
+                if (
+                    k not in kwargs
+                ):  # prioritize model-specific params > default router params
+                    kwargs[k] = v
+                elif k == "metadata":
+                    kwargs[k].update(v)
+
+            potential_model_client = self._get_client(
+                deployment=deployment, kwargs=kwargs, client_type="async"
+            )
+            # check if provided keys == client keys #
+            dynamic_api_key = kwargs.get("api_key", None)
+            if (
+                dynamic_api_key is not None
+                and potential_model_client is not None
+                and dynamic_api_key != potential_model_client.api_key
+            ):
+                model_client = None
+            else:
+                model_client = potential_model_client
+            self.total_calls[model_name] += 1
+
+            response = litellm.aadapter_completion(
+                **{
+                    **data,
+                    "adapter_id": adapter_id,
+                    "caching": self.cache_responses,
+                    "client": model_client,
+                    "timeout": self.timeout,
+                    **kwargs,
+                }
+            )
+
+            rpm_semaphore = self._get_client(
+                deployment=deployment,
+                kwargs=kwargs,
+                client_type="max_parallel_requests",
+            )
+
+            if rpm_semaphore is not None and isinstance(
+                rpm_semaphore, asyncio.Semaphore
+            ):
+                async with rpm_semaphore:
+                    """
+                    - Check rpm limits before making the call
+                    - If allowed, increment the rpm limit (allows global value to be updated, concurrency-safe)
+                    """
+                    await self.async_routing_strategy_pre_call_checks(
+                        deployment=deployment
+                    )
+                    response = await response  # type: ignore
+            else:
+                await self.async_routing_strategy_pre_call_checks(deployment=deployment)
+                response = await response  # type: ignore
+
+            self.success_calls[model_name] += 1
+            verbose_router_logger.info(
+                f"litellm.aadapter_completion(model={model_name})\033[32m 200 OK\033[0m"
+            )
+            return response
+        except Exception as e:
+            verbose_router_logger.info(
+                f"litellm.aadapter_completion(model={model})\033[31m Exception {str(e)}\033[0m"
+            )
+            if model is not None:
+                self.fail_calls[model] += 1
+            raise e
+
    def embedding(
        self,
        model: str,
--- a/litellm/tests/test_anthropic_completion.py
+++ b/litellm/tests/test_anthropic_completion.py
@ -0,0 +1,103 @@
+# What is this?
+## Unit tests for Anthropic Adapter
+
+import asyncio
+import os
+import sys
+import traceback
+
+from dotenv import load_dotenv
+
+load_dotenv()
+import io
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+import litellm
+from litellm import AnthropicConfig, Router, adapter_completion
+from litellm.adapters.anthropic_adapter import anthropic_adapter
+from litellm.types.llms.anthropic import AnthropicResponse
+
+
+def test_anthropic_completion_messages_translation():
+    messages = [{"role": "user", "content": "Hey, how's it going?"}]
+
+    translated_messages = AnthropicConfig().translate_anthropic_messages_to_openai(messages=messages)  # type: ignore
+
+    assert translated_messages == [{"role": "user", "content": "Hey, how's it going?"}]
+
+
+def test_anthropic_completion_input_translation():
+    data = {
+        "model": "gpt-3.5-turbo",
+        "messages": [{"role": "user", "content": "Hey, how's it going?"}],
+    }
+    translated_input = anthropic_adapter.translate_completion_input_params(kwargs=data)
+
+    assert translated_input is not None
+
+    assert translated_input["model"] == "gpt-3.5-turbo"
+    assert translated_input["messages"] == [
+        {"role": "user", "content": "Hey, how's it going?"}
+    ]
+
+
+def test_anthropic_completion_e2e():
+    litellm.set_verbose = True
+
+    litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
+
+    messages = [{"role": "user", "content": "Hey, how's it going?"}]
+    response = adapter_completion(
+        model="gpt-3.5-turbo",
+        messages=messages,
+        adapter_id="anthropic",
+        mock_response="This is a fake call",
+    )
+
+    print("Response: {}".format(response))
+
+    assert response is not None
+
+    assert isinstance(response, AnthropicResponse)
+
+
+@pytest.mark.asyncio
+async def test_anthropic_router_completion_e2e():
+    litellm.set_verbose = True
+
+    litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
+
+    router = Router(
+        model_list=[
+            {
+                "model_name": "claude-3-5-sonnet-20240620",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "mock_response": "hi this is macintosh.",
+                },
+            }
+        ]
+    )
+    messages = [{"role": "user", "content": "Hey, how's it going?"}]
+
+    response = await router.aadapter_completion(
+        model="claude-3-5-sonnet-20240620",
+        messages=messages,
+        adapter_id="anthropic",
+        mock_response="This is a fake call",
+    )
+
+    print("Response: {}".format(response))
+
+    assert response is not None
+
+    assert isinstance(response, AnthropicResponse)
+
+    assert response.model == "gpt-3.5-turbo"
--- a/litellm/types/adapter.py
+++ b/litellm/types/adapter.py
@ -0,0 +1,10 @@
+from typing import List
+
+from typing_extensions import Dict, Required, TypedDict, override
+
+from litellm.integrations.custom_logger import CustomLogger
+
+
+class AdapterItem(TypedDict):
+    id: str
+    adapter: CustomLogger
--- a/litellm/types/llms/anthropic.py
+++ b/litellm/types/llms/anthropic.py
@ -9,25 +9,27 @@ class AnthropicMessagesToolChoice(TypedDict, total=False):
    name: str


-class AnthopicMessagesAssistantMessageTextContentParam(TypedDict, total=False):
-    type: Required[Literal["text"]]
+class AnthropicMessagesTool(TypedDict, total=False):
+    name: Required[str]
+    description: str
+    input_schema: Required[dict]

+
+class AnthropicMessagesTextParam(TypedDict):
+    type: Literal["text"]
    text: str


-class AnthopicMessagesAssistantMessageToolCallParam(TypedDict, total=False):
-    type: Required[Literal["tool_use"]]
-
+class AnthropicMessagesToolUseParam(TypedDict):
+    type: Literal["tool_use"]
    id: str
-
    name: str
-
    input: dict


 AnthropicMessagesAssistantMessageValues = Union[
-    AnthopicMessagesAssistantMessageTextContentParam,
-    AnthopicMessagesAssistantMessageToolCallParam,
+    AnthropicMessagesTextParam,
+    AnthropicMessagesToolUseParam,
 ]


@ -46,6 +48,72 @@ class AnthopicMessagesAssistantMessageParam(TypedDict, total=False):
    """


+class AnthropicImageParamSource(TypedDict):
+    type: Literal["base64"]
+    media_type: str
+    data: str
+
+
+class AnthropicMessagesImageParam(TypedDict):
+    type: Literal["image"]
+    source: AnthropicImageParamSource
+
+
+class AnthropicMessagesToolResultContent(TypedDict):
+    type: Literal["text"]
+    text: str
+
+
+class AnthropicMessagesToolResultParam(TypedDict, total=False):
+    type: Required[Literal["tool_result"]]
+    tool_use_id: Required[str]
+    is_error: bool
+    content: Union[
+        str,
+        Iterable[
+            Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam]
+        ],
+    ]
+
+
+AnthropicMessagesUserMessageValues = Union[
+    AnthropicMessagesTextParam,
+    AnthropicMessagesImageParam,
+    AnthropicMessagesToolResultParam,
+]
+
+
+class AnthropicMessagesUserMessageParam(TypedDict, total=False):
+    role: Required[Literal["user"]]
+    content: Required[Union[str, Iterable[AnthropicMessagesUserMessageValues]]]
+
+
+class AnthropicMetadata(TypedDict, total=False):
+    user_id: str
+
+
+class AnthropicMessagesRequest(TypedDict, total=False):
+    model: Required[str]
+    messages: Required[
+        List[
+            Union[
+                AnthropicMessagesUserMessageParam,
+                AnthopicMessagesAssistantMessageParam,
+            ]
+        ]
+    ]
+    max_tokens: Required[int]
+    metadata: AnthropicMetadata
+    stop_sequences: List[str]
+    stream: bool
+    system: str
+    temperature: float
+    tool_choice: AnthropicMessagesToolChoice
+    tools: List[AnthropicMessagesTool]
+    top_k: int
+    top_p: float
+
+
 class ContentTextBlockDelta(TypedDict):
    """
    'delta': {'type': 'text_delta', 'text': 'Hello'}
@ -155,3 +223,51 @@ class MessageStartBlock(TypedDict):

    type: Literal["message_start"]
    message: MessageChunk
+
+
+class AnthropicResponseContentBlockText(BaseModel):
+    type: Literal["text"]
+    text: str
+
+
+class AnthropicResponseContentBlockToolUse(BaseModel):
+    type: Literal["tool_use"]
+    id: str
+    name: str
+    input: dict
+
+
+class AnthropicResponseUsageBlock(BaseModel):
+    input_tokens: int
+    output_tokens: int
+
+
+AnthropicFinishReason = Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
+
+
+class AnthropicResponse(BaseModel):
+    id: str
+    """Unique object identifier."""
+
+    type: Literal["message"]
+    """For Messages, this is always "message"."""
+
+    role: Literal["assistant"]
+    """Conversational role of the generated message. This will always be "assistant"."""
+
+    content: List[
+        Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
+    ]
+    """Content generated by the model."""
+
+    model: str
+    """The model that handled the request."""
+
+    stop_reason: Optional[AnthropicFinishReason]
+    """The reason that we stopped."""
+
+    stop_sequence: Optional[str]
+    """Which custom stop sequence was generated, if any."""
+
+    usage: AnthropicResponseUsageBlock
+    """Billing and rate-limit usage."""
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -305,7 +305,13 @@ class ChatCompletionToolCallFunctionChunk(TypedDict, total=False):
    arguments: str


-class ChatCompletionToolCallChunk(TypedDict):
+class ChatCompletionAssistantToolCall(TypedDict):
+    id: Optional[str]
+    type: Literal["function"]
+    function: ChatCompletionToolCallFunctionChunk
+
+
+class ChatCompletionToolCallChunk(TypedDict):  # result of /chat/completions call
    id: Optional[str]
    type: Literal["function"]
    function: ChatCompletionToolCallFunctionChunk
@ -319,6 +325,107 @@ class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):
    index: int


+class ChatCompletionTextObject(TypedDict):
+    type: Literal["text"]
+    text: str
+
+
+class ChatCompletionImageUrlObject(TypedDict, total=False):
+    url: Required[str]
+    detail: str
+
+
+class ChatCompletionImageObject(TypedDict):
+    type: Literal["image_url"]
+    image_url: ChatCompletionImageUrlObject
+
+
+class ChatCompletionUserMessage(TypedDict):
+    role: Literal["user"]
+    content: Union[
+        str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
+    ]
+
+
+class ChatCompletionAssistantMessage(TypedDict, total=False):
+    role: Required[Literal["assistant"]]
+    content: Optional[str]
+    name: str
+    tool_calls: List[ChatCompletionAssistantToolCall]
+
+
+class ChatCompletionToolMessage(TypedDict):
+    role: Literal["tool"]
+    content: str
+    tool_call_id: str
+
+
+class ChatCompletionSystemMessage(TypedDict, total=False):
+    role: Required[Literal["system"]]
+    content: Required[str]
+    name: str
+
+
+AllMessageValues = Union[
+    ChatCompletionUserMessage,
+    ChatCompletionAssistantMessage,
+    ChatCompletionToolMessage,
+    ChatCompletionSystemMessage,
+]
+
+
+class ChatCompletionToolChoiceFunctionParam(TypedDict):
+    name: str
+
+
+class ChatCompletionToolChoiceObjectParam(TypedDict):
+    type: Literal["function"]
+    function: ChatCompletionToolChoiceFunctionParam
+
+
+ChatCompletionToolChoiceStringValues = Literal["none", "auto", "required"]
+
+ChatCompletionToolChoiceValues = Union[
+    ChatCompletionToolChoiceStringValues, ChatCompletionToolChoiceObjectParam
+]
+
+
+class ChatCompletionToolParamFunctionChunk(TypedDict, total=False):
+    name: Required[str]
+    description: str
+    parameters: dict
+
+
+class ChatCompletionToolParam(TypedDict):
+    type: Literal["function"]
+    function: ChatCompletionToolParamFunctionChunk
+
+
+class ChatCompletionRequest(TypedDict, total=False):
+    model: Required[str]
+    messages: Required[List[AllMessageValues]]
+    frequency_penalty: float
+    logit_bias: dict
+    logprobs: bool
+    top_logprobs: int
+    max_tokens: int
+    n: int
+    presence_penalty: float
+    response_format: dict
+    seed: int
+    service_tier: str
+    stop: Union[str, List[str]]
+    stream_options: dict
+    temperature: float
+    top_p: float
+    tools: List[ChatCompletionToolParam]
+    tool_choice: ChatCompletionToolChoiceValues
+    parallel_tool_calls: bool
+    function_call: Union[str, dict]
+    functions: List
+    user: str
+
+
 class ChatCompletionDeltaChunk(TypedDict, total=False):
    content: Optional[str]
    tool_calls: List[ChatCompletionDeltaToolCallChunk]
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -166,7 +166,9 @@ class FunctionCall(OpenAIObject):

 class Function(OpenAIObject):
    arguments: str
-    name: Optional[str] = None
+    name: Optional[
+        str
+    ]  # can be None - openai e.g.: ChoiceDeltaToolCallFunction(arguments='{"', name=None), type=None)

    def __init__(
        self,
@ -280,29 +282,43 @@ class ChatCompletionMessageToolCall(OpenAIObject):
        setattr(self, key, value)


+"""
+Reference:
+ChatCompletionMessage(content='This is a test', role='assistant', function_call=None, tool_calls=None))
+"""
+
+
 class Message(OpenAIObject):
+
+    content: Optional[str]
+    role: Literal["assistant"]
+    tool_calls: Optional[List[ChatCompletionMessageToolCall]]
+    function_call: Optional[FunctionCall]
+
    def __init__(
        self,
-        content: Optional[str] = "default",
-        role="assistant",
-        logprobs=None,
+        content: Optional[str] = None,
+        role: Literal["assistant"] = "assistant",
        function_call=None,
        tool_calls=None,
        **params,
    ):
-        super(Message, self).__init__(**params)
-        self.content = content
-        self.role = role
-        if function_call is not None:
-            self.function_call = FunctionCall(**function_call)
-
-        if tool_calls is not None:
-            self.tool_calls = []
-            for tool_call in tool_calls:
-                self.tool_calls.append(ChatCompletionMessageToolCall(**tool_call))
-
-        if logprobs is not None:
-            self._logprobs = ChoiceLogprobs(**logprobs)
+        init_values = {
+            "content": content,
+            "role": role,
+            "function_call": (
+                FunctionCall(**function_call) if function_call is not None else None
+            ),
+            "tool_calls": (
+                [ChatCompletionMessageToolCall(**tool_call) for tool_call in tool_calls]
+                if tool_calls is not None
+                else None
+            ),
+        }
+        super(Message, self).__init__(
+            **init_values,
+            **params,
+        )

    def get(self, key, default=None):
        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -8126,7 +8126,7 @@ class CustomStreamWrapper:

            if chunk.startswith(self.complete_response):
                # Remove last_sent_chunk only if it appears at the start of the new chunk
-                chunk = chunk[len(self.complete_response):]
+                chunk = chunk[len(self.complete_response) :]

            self.complete_response += chunk
            return chunk
@ -9483,8 +9483,8 @@ class CustomStreamWrapper:
                            model_response.choices[0].delta = Delta(**_json_delta)
                        except Exception as e:
                            verbose_logger.error(
-                                "litellm.CustomStreamWrapper.chunk_creator(): Exception occured - {}".format(
-                                    str(e)
+                                "litellm.CustomStreamWrapper.chunk_creator(): Exception occured - {}\n{}".format(
+                                    str(e), traceback.format_exc()
                                )
                            )
                            verbose_logger.debug(traceback.format_exc())
@ -10124,7 +10124,7 @@ def mock_completion_streaming_obj(
    model_response, mock_response, model, n: Optional[int] = None
 ):
    for i in range(0, len(mock_response), 3):
-        completion_obj = Delta(role="assistant", content=mock_response[i: i + 3])
+        completion_obj = Delta(role="assistant", content=mock_response[i : i + 3])
        if n is None:
            model_response.choices[0].delta = completion_obj
        else:
@ -10133,7 +10133,7 @@ def mock_completion_streaming_obj(
                _streaming_choice = litellm.utils.StreamingChoices(
                    index=j,
                    delta=litellm.utils.Delta(
-                        role="assistant", content=mock_response[i: i + 3]
+                        role="assistant", content=mock_response[i : i + 3]
                    ),
                )
                _all_choices.append(_streaming_choice)
@ -10145,7 +10145,7 @@ async def async_mock_completion_streaming_obj(
    model_response, mock_response, model, n: Optional[int] = None
 ):
    for i in range(0, len(mock_response), 3):
-        completion_obj = Delta(role="assistant", content=mock_response[i: i + 3])
+        completion_obj = Delta(role="assistant", content=mock_response[i : i + 3])
        if n is None:
            model_response.choices[0].delta = completion_obj
        else:
@ -10154,7 +10154,7 @@ async def async_mock_completion_streaming_obj(
                _streaming_choice = litellm.utils.StreamingChoices(
                    index=j,
                    delta=litellm.utils.Delta(
-                        role="assistant", content=mock_response[i: i + 3]
+                        role="assistant", content=mock_response[i : i + 3]
                    ),
                )
                _all_choices.append(_streaming_choice)