LiteLLM Minor Fixes & Improvements (09/27/2024) (#5938)

* fix(langfuse.py): prevent double logging requester metadata Fixes https://github.com/BerriAI/litellm/issues/5935 * build(model_prices_and_context_window.json): add mistral pixtral cost tracking Closes https://github.com/BerriAI/litellm/issues/5837 * handle streaming for azure ai studio error * [Perf Proxy] parallel request limiter - use one cache update call (#5932) * fix parallel request limiter - use one cache update call * ci/cd run again * run ci/cd again * use docker username password * fix config.yml * fix config * fix config * fix config.yml * ci/cd run again * use correct typing for batch set cache * fix async_set_cache_pipeline * fix only check user id tpm / rpm limits when limits set * fix test_openai_azure_embedding_with_oidc_and_cf * fix(groq/chat/transformation.py): Fixes https://github.com/BerriAI/litellm/issues/5839 * feat(anthropic/chat.py): return 'retry-after' headers from anthropic Fixes https://github.com/BerriAI/litellm/issues/4387 * feat: raise validation error if message has tool calls without passing `tools` param for anthropic/bedrock Closes https://github.com/BerriAI/litellm/issues/5747 * [Feature]#5940, add max_workers parameter for the batch_completion (#5947) * handle streaming for azure ai studio error * bump: version 1.48.2 → 1.48.3 * docs(data_security.md): add legal/compliance faq's Make it easier for companies to use litellm * docs: resolve imports * [Feature]#5940, add max_workers parameter for the batch_completion method --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local> * fix(converse_transformation.py): fix default message value * fix(utils.py): fix get_model_info to handle finetuned models Fixes issue for standard logging payloads, where model_map_value was null for finetuned openai models * fix(litellm_pre_call_utils.py): add debug statement for data sent after updating with team/key callbacks * fix: fix linting errors * fix(anthropic/chat/handler.py): fix cache creation input tokens * fix(exception_mapping_utils.py): fix missing imports * fix(anthropic/chat/handler.py): fix usage block translation * test: fix test * test: fix tests * style(types/utils.py): trigger new build * test: fix test --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Jose Alberto Arango Sanchez <jose.arangos@udea.edu.co> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local>
2025-04-25 18:54:30 +00:00 · 2024-09-27 22:52:57 -07:00 · 2024-09-27 22:52:57 -07:00 · 0b30e212da
commit 0b30e212da
parent 754981a78f
35 changed files with 3657 additions and 2820 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -89,6 +89,7 @@ retry = True
 ### AUTH ###
 api_key: Optional[str] = None
 openai_key: Optional[str] = None
+groq_key: Optional[str] = None
 databricks_key: Optional[str] = None
 azure_key: Optional[str] = None
 anthropic_key: Optional[str] = None
@ -892,7 +893,11 @@ ALL_LITELLM_RESPONSE_TYPES = [
 from .types.utils import ImageObject
 from .llms.custom_llm import CustomLLM
 from .llms.huggingface_restapi import HuggingfaceConfig
-from .llms.anthropic.chat import AnthropicConfig
+from .llms.anthropic.chat.handler import AnthropicConfig
+from .llms.anthropic.experimental_pass_through.transformation import (
+    AnthropicExperimentalPassThroughConfig,
+)
+from .llms.groq.stt.transformation import GroqSTTConfig
 from .llms.anthropic.completion import AnthropicTextConfig
 from .llms.databricks.chat import DatabricksConfig, DatabricksEmbeddingConfig
 from .llms.predibase import PredibaseConfig
@ -962,8 +967,8 @@ from .llms.OpenAI.openai import (
    OpenAITextCompletionConfig,
    MistralEmbeddingConfig,
    DeepInfraConfig,
-    GroqConfig,
 )
+from .llms.groq.chat.transformation import GroqChatConfig
 from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
 from .llms.mistral.mistral_chat_transformation import MistralConfig
 from .llms.OpenAI.chat.o1_transformation import (
--- a/litellm/adapters/anthropic_adapter.py
+++ b/litellm/adapters/anthropic_adapter.py
@ -34,7 +34,7 @@ class AnthropicAdapter(CustomLogger):
        """
        request_body = AnthropicMessagesRequest(**kwargs)  # type: ignore

-        translated_body = litellm.AnthropicConfig().translate_anthropic_to_openai(
+        translated_body = litellm.AnthropicExperimentalPassThroughConfig().translate_anthropic_to_openai(
            anthropic_message_request=request_body
        )

@ -44,7 +44,7 @@ class AnthropicAdapter(CustomLogger):
        self, response: litellm.ModelResponse
    ) -> Optional[AnthropicResponse]:

-        return litellm.AnthropicConfig().translate_openai_response_to_anthropic(
+        return litellm.AnthropicExperimentalPassThroughConfig().translate_openai_response_to_anthropic(
            response=response
        )

@ -99,7 +99,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
                if chunk == "None" or chunk is None:
                    raise Exception

-                processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic(
+                processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic(
                    response=chunk
                )
                if (
@ -163,7 +163,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
            async for chunk in self.completion_stream:
                if chunk == "None" or chunk is None:
                    raise Exception
-                processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic(
+                processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic(
                    response=chunk
                )
                if (
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@ -601,7 +601,7 @@ class LangFuseLogger:
                "input": input if not mask_input else "redacted-by-litellm",
                "output": output if not mask_output else "redacted-by-litellm",
                "usage": usage,
-                "metadata": clean_metadata,
+                "metadata": log_requester_metadata(clean_metadata),
                "level": level,
                "version": clean_metadata.pop("version", None),
            }
@ -768,3 +768,15 @@ def log_provider_specific_information_as_span(
                name="vertex_ai_grounding_metadata",
                input=vertex_ai_grounding_metadata,
            )
+
+
+def log_requester_metadata(clean_metadata: dict):
+    returned_metadata = {}
+    requester_metadata = clean_metadata.get("requester_metadata") or {}
+    for k, v in clean_metadata.items():
+        if k not in requester_metadata:
+            returned_metadata[k] = v
+
+    returned_metadata.update({"requester_metadata": requester_metadata})
+
+    return returned_metadata
--- a/litellm/litellm_core_utils/exception_mapping_utils.py
+++ b/litellm/litellm_core_utils/exception_mapping_utils.py
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -1015,9 +1015,8 @@ class Logging:
                                != langFuseLogger.public_key
                            )
                            or (
-                                self.langfuse_public_key is not None
-                                and self.langfuse_public_key
-                                != langFuseLogger.public_key
+                                self.langfuse_secret is not None
+                                and self.langfuse_secret != langFuseLogger.secret_key
                            )
                            or (
                                self.langfuse_host is not None
@ -1045,7 +1044,6 @@ class Logging:
                                    service_name="langfuse",
                                    logging_obj=temp_langfuse_logger,
                                )
-
                        if temp_langfuse_logger is not None:
                            _response = temp_langfuse_logger.log_event(
                                kwargs=kwargs,
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@ -220,104 +220,6 @@ class DeepInfraConfig:
        return optional_params


-class GroqConfig:
-    """
-    Reference: https://deepinfra.com/docs/advanced/openai_api
-
-    The class `DeepInfra` provides configuration for the DeepInfra's Chat Completions API interface. Below are the parameters:
-    """
-
-    frequency_penalty: Optional[int] = None
-    function_call: Optional[Union[str, dict]] = None
-    functions: Optional[list] = None
-    logit_bias: Optional[dict] = None
-    max_tokens: Optional[int] = None
-    n: Optional[int] = None
-    presence_penalty: Optional[int] = None
-    stop: Optional[Union[str, list]] = None
-    temperature: Optional[int] = None
-    top_p: Optional[int] = None
-    response_format: Optional[dict] = None
-    tools: Optional[list] = None
-    tool_choice: Optional[Union[str, dict]] = None
-
-    def __init__(
-        self,
-        frequency_penalty: Optional[int] = None,
-        function_call: Optional[Union[str, dict]] = None,
-        functions: Optional[list] = None,
-        logit_bias: Optional[dict] = None,
-        max_tokens: Optional[int] = None,
-        n: Optional[int] = None,
-        presence_penalty: Optional[int] = None,
-        stop: Optional[Union[str, list]] = None,
-        temperature: Optional[int] = None,
-        top_p: Optional[int] = None,
-        response_format: Optional[dict] = None,
-        tools: Optional[list] = None,
-        tool_choice: Optional[Union[str, dict]] = None,
-    ) -> None:
-        locals_ = locals().copy()
-        for key, value in locals_.items():
-            if key != "self" and value is not None:
-                setattr(self.__class__, key, value)
-
-    @classmethod
-    def get_config(cls):
-        return {
-            k: v
-            for k, v in cls.__dict__.items()
-            if not k.startswith("__")
-            and not isinstance(
-                v,
-                (
-                    types.FunctionType,
-                    types.BuiltinFunctionType,
-                    classmethod,
-                    staticmethod,
-                ),
-            )
-            and v is not None
-        }
-
-    def get_supported_openai_params_stt(self):
-        return [
-            "prompt",
-            "response_format",
-            "temperature",
-            "language",
-        ]
-
-    def get_supported_openai_response_formats_stt(self) -> List[str]:
-        return ["json", "verbose_json", "text"]
-
-    def map_openai_params_stt(
-        self,
-        non_default_params: dict,
-        optional_params: dict,
-        model: str,
-        drop_params: bool,
-    ) -> dict:
-        response_formats = self.get_supported_openai_response_formats_stt()
-        for param, value in non_default_params.items():
-            if param == "response_format":
-                if value in response_formats:
-                    optional_params[param] = value
-                else:
-                    if litellm.drop_params is True or drop_params is True:
-                        pass
-                    else:
-                        raise litellm.utils.UnsupportedParamsError(
-                            message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
-                                value
-                            ),
-                            status_code=400,
-                        )
-            else:
-                optional_params[param] = value
-        return optional_params
-
-
 class OpenAIConfig:
    """
    Reference: https://platform.openai.com/docs/api-reference/chat/create
--- a/litellm/llms/anthropic/chat/init.py
+++ b/litellm/llms/anthropic/chat/init.py
@ -0,0 +1 @@
+from .handler import AnthropicChatCompletion, ModelResponseIterator
--- a/litellm/llms/anthropic/chat/handler.py
+++ b/litellm/llms/anthropic/chat/handler.py
@ -71,12 +71,19 @@ from litellm.types.llms.openai import (
    ChatCompletionToolParamFunctionChunk,
    ChatCompletionUsageBlock,
    ChatCompletionUserMessage,
+    OpenAIMessageContent,
 )
 from litellm.types.utils import Choices, GenericStreamingChunk
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage

-from ..base import BaseLLM
-from ..prompt_templates.factory import custom_prompt, prompt_factory
+from ...base import BaseLLM
+from ...prompt_templates.factory import (
+    anthropic_messages_pt,
+    custom_prompt,
+    prompt_factory,
+)
+from ..common_utils import AnthropicError
+from .transformation import AnthropicConfig


 class AnthropicConstants(Enum):
@ -86,558 +93,6 @@ class AnthropicConstants(Enum):
    # constants from https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/_constants.py


-class AnthropicError(Exception):
-    def __init__(self, status_code: int, message):
-        self.status_code = status_code
-        self.message: str = message
-        self.request = httpx.Request(
-            method="POST", url="https://api.anthropic.com/v1/messages"
-        )
-        self.response = httpx.Response(status_code=status_code, request=self.request)
-        super().__init__(
-            self.message
-        )  # Call the base class constructor with the parameters it needs
-
-
-class AnthropicConfig:
-    """
-    Reference: https://docs.anthropic.com/claude/reference/messages_post
-
-    to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
-    """
-
-    max_tokens: Optional[int] = (
-        4096  # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default)
-    )
-    stop_sequences: Optional[list] = None
-    temperature: Optional[int] = None
-    top_p: Optional[int] = None
-    top_k: Optional[int] = None
-    metadata: Optional[dict] = None
-    system: Optional[str] = None
-
-    def __init__(
-        self,
-        max_tokens: Optional[
-            int
-        ] = 4096,  # You can pass in a value yourself or use the default value 4096
-        stop_sequences: Optional[list] = None,
-        temperature: Optional[int] = None,
-        top_p: Optional[int] = None,
-        top_k: Optional[int] = None,
-        metadata: Optional[dict] = None,
-        system: Optional[str] = None,
-    ) -> None:
-        locals_ = locals()
-        for key, value in locals_.items():
-            if key != "self" and value is not None:
-                setattr(self.__class__, key, value)
-
-    @classmethod
-    def get_config(cls):
-        return {
-            k: v
-            for k, v in cls.__dict__.items()
-            if not k.startswith("__")
-            and not isinstance(
-                v,
-                (
-                    types.FunctionType,
-                    types.BuiltinFunctionType,
-                    classmethod,
-                    staticmethod,
-                ),
-            )
-            and v is not None
-        }
-
-    def get_supported_openai_params(self):
-        return [
-            "stream",
-            "stop",
-            "temperature",
-            "top_p",
-            "max_tokens",
-            "max_completion_tokens",
-            "tools",
-            "tool_choice",
-            "extra_headers",
-        ]
-
-    def get_cache_control_headers(self) -> dict:
-        return {
-            "anthropic-version": "2023-06-01",
-            "anthropic-beta": "prompt-caching-2024-07-31",
-        }
-
-    def map_openai_params(self, non_default_params: dict, optional_params: dict):
-        for param, value in non_default_params.items():
-            if param == "max_tokens":
-                optional_params["max_tokens"] = value
-            if param == "max_completion_tokens":
-                optional_params["max_tokens"] = value
-            if param == "tools":
-                optional_params["tools"] = value
-            if param == "tool_choice":
-                _tool_choice: Optional[AnthropicMessagesToolChoice] = None
-                if value == "auto":
-                    _tool_choice = {"type": "auto"}
-                elif value == "required":
-                    _tool_choice = {"type": "any"}
-                elif isinstance(value, dict):
-                    _tool_choice = {"type": "tool", "name": value["function"]["name"]}
-
-                if _tool_choice is not None:
-                    optional_params["tool_choice"] = _tool_choice
-            if param == "stream" and value == True:
-                optional_params["stream"] = value
-            if param == "stop":
-                if isinstance(value, str):
-                    if (
-                        value == "\n"
-                    ) and litellm.drop_params == True:  # anthropic doesn't allow whitespace characters as stop-sequences
-                        continue
-                    value = [value]
-                elif isinstance(value, list):
-                    new_v = []
-                    for v in value:
-                        if (
-                            v == "\n"
-                        ) and litellm.drop_params == True:  # anthropic doesn't allow whitespace characters as stop-sequences
-                            continue
-                        new_v.append(v)
-                    if len(new_v) > 0:
-                        value = new_v
-                    else:
-                        continue
-                optional_params["stop_sequences"] = value
-            if param == "temperature":
-                optional_params["temperature"] = value
-            if param == "top_p":
-                optional_params["top_p"] = value
-        return optional_params
-
-    def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
-        """
-        Return if {"cache_control": ..} in message content block
-
-        Used to check if anthropic prompt caching headers need to be set.
-        """
-        for message in messages:
-            if message["content"] is not None and isinstance(message["content"], list):
-                for content in message["content"]:
-                    if "cache_control" in content:
-                        return True
-
-        return False
-
-    def translate_system_message(
-        self, messages: List[AllMessageValues]
-    ) -> List[AnthropicSystemMessageContent]:
-        system_prompt_indices = []
-        anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
-        for idx, message in enumerate(messages):
-            if message["role"] == "system":
-                valid_content: bool = False
-                system_message_block = ChatCompletionSystemMessage(**message)
-                if isinstance(system_message_block["content"], str):
-                    anthropic_system_message_content = AnthropicSystemMessageContent(
-                        type="text",
-                        text=system_message_block["content"],
-                    )
-                    if "cache_control" in system_message_block:
-                        anthropic_system_message_content["cache_control"] = (
-                            system_message_block["cache_control"]
-                        )
-                    anthropic_system_message_list.append(
-                        anthropic_system_message_content
-                    )
-                    valid_content = True
-                elif isinstance(message["content"], list):
-                    for _content in message["content"]:
-                        anthropic_system_message_content = (
-                            AnthropicSystemMessageContent(
-                                type=_content.get("type"),
-                                text=_content.get("text"),
-                            )
-                        )
-                        if "cache_control" in _content:
-                            anthropic_system_message_content["cache_control"] = (
-                                _content["cache_control"]
-                            )
-
-                        anthropic_system_message_list.append(
-                            anthropic_system_message_content
-                        )
-                    valid_content = True
-
-                if valid_content:
-                    system_prompt_indices.append(idx)
-        if len(system_prompt_indices) > 0:
-            for idx in reversed(system_prompt_indices):
-                messages.pop(idx)
-
-        return anthropic_system_message_list
-
-    ### FOR [BETA] `/v1/messages` endpoint support
-
-    def translatable_anthropic_params(self) -> List:
-        """
-        Which anthropic params, we need to translate to the openai format.
-        """
-        return ["messages", "metadata", "system", "tool_choice", "tools"]
-
-    def translate_anthropic_messages_to_openai(
-        self,
-        messages: List[
-            Union[
-                AnthropicMessagesUserMessageParam,
-                AnthopicMessagesAssistantMessageParam,
-            ]
-        ],
-    ) -> List:
-        new_messages: List[AllMessageValues] = []
-        for m in messages:
-            user_message: Optional[ChatCompletionUserMessage] = None
-            tool_message_list: List[ChatCompletionToolMessage] = []
-            new_user_content_list: List[
-                Union[ChatCompletionTextObject, ChatCompletionImageObject]
-            ] = []
-            ## USER MESSAGE ##
-            if m["role"] == "user":
-                ## translate user message
-                if isinstance(m["content"], str):
-                    user_message = ChatCompletionUserMessage(
-                        role="user", content=m["content"]
-                    )
-                elif isinstance(m["content"], list):
-                    for content in m["content"]:
-                        if content["type"] == "text":
-                            text_obj = ChatCompletionTextObject(
-                                type="text", text=content["text"]
-                            )
-                            new_user_content_list.append(text_obj)
-                        elif content["type"] == "image":
-                            image_url = ChatCompletionImageUrlObject(
-                                url=f"data:{content['type']};base64,{content['source']}"
-                            )
-                            image_obj = ChatCompletionImageObject(
-                                type="image_url", image_url=image_url
-                            )
-
-                            new_user_content_list.append(image_obj)
-                        elif content["type"] == "tool_result":
-                            if "content" not in content:
-                                tool_result = ChatCompletionToolMessage(
-                                    role="tool",
-                                    tool_call_id=content["tool_use_id"],
-                                    content="",
-                                )
-                                tool_message_list.append(tool_result)
-                            elif isinstance(content["content"], str):
-                                tool_result = ChatCompletionToolMessage(
-                                    role="tool",
-                                    tool_call_id=content["tool_use_id"],
-                                    content=content["content"],
-                                )
-                                tool_message_list.append(tool_result)
-                            elif isinstance(content["content"], list):
-                                for c in content["content"]:
-                                    if c["type"] == "text":
-                                        tool_result = ChatCompletionToolMessage(
-                                            role="tool",
-                                            tool_call_id=content["tool_use_id"],
-                                            content=c["text"],
-                                        )
-                                        tool_message_list.append(tool_result)
-                                    elif c["type"] == "image":
-                                        image_str = (
-                                            f"data:{c['type']};base64,{c['source']}"
-                                        )
-                                        tool_result = ChatCompletionToolMessage(
-                                            role="tool",
-                                            tool_call_id=content["tool_use_id"],
-                                            content=image_str,
-                                        )
-                                        tool_message_list.append(tool_result)
-
-            if user_message is not None:
-                new_messages.append(user_message)
-
-            if len(new_user_content_list) > 0:
-                new_messages.append({"role": "user", "content": new_user_content_list})  # type: ignore
-
-            if len(tool_message_list) > 0:
-                new_messages.extend(tool_message_list)
-
-            ## ASSISTANT MESSAGE ##
-            assistant_message_str: Optional[str] = None
-            tool_calls: List[ChatCompletionAssistantToolCall] = []
-            if m["role"] == "assistant":
-                if isinstance(m["content"], str):
-                    assistant_message_str = m["content"]
-                elif isinstance(m["content"], list):
-                    for content in m["content"]:
-                        if content["type"] == "text":
-                            if assistant_message_str is None:
-                                assistant_message_str = content["text"]
-                            else:
-                                assistant_message_str += content["text"]
-                        elif content["type"] == "tool_use":
-                            function_chunk = ChatCompletionToolCallFunctionChunk(
-                                name=content["name"],
-                                arguments=json.dumps(content["input"]),
-                            )
-
-                            tool_calls.append(
-                                ChatCompletionAssistantToolCall(
-                                    id=content["id"],
-                                    type="function",
-                                    function=function_chunk,
-                                )
-                            )
-
-            if assistant_message_str is not None or len(tool_calls) > 0:
-                assistant_message = ChatCompletionAssistantMessage(
-                    role="assistant",
-                    content=assistant_message_str,
-                )
-                if len(tool_calls) > 0:
-                    assistant_message["tool_calls"] = tool_calls
-                new_messages.append(assistant_message)
-
-        return new_messages
-
-    def translate_anthropic_tool_choice_to_openai(
-        self, tool_choice: AnthropicMessagesToolChoice
-    ) -> ChatCompletionToolChoiceValues:
-        if tool_choice["type"] == "any":
-            return "required"
-        elif tool_choice["type"] == "auto":
-            return "auto"
-        elif tool_choice["type"] == "tool":
-            tc_function_param = ChatCompletionToolChoiceFunctionParam(
-                name=tool_choice.get("name", "")
-            )
-            return ChatCompletionToolChoiceObjectParam(
-                type="function", function=tc_function_param
-            )
-        else:
-            raise ValueError(
-                "Incompatible tool choice param submitted - {}".format(tool_choice)
-            )
-
-    def translate_anthropic_tools_to_openai(
-        self, tools: List[AnthropicMessagesTool]
-    ) -> List[ChatCompletionToolParam]:
-        new_tools: List[ChatCompletionToolParam] = []
-        for tool in tools:
-            function_chunk = ChatCompletionToolParamFunctionChunk(
-                name=tool["name"],
-                parameters=tool["input_schema"],
-            )
-            if "description" in tool:
-                function_chunk["description"] = tool["description"]
-            new_tools.append(
-                ChatCompletionToolParam(type="function", function=function_chunk)
-            )
-
-        return new_tools
-
-    def translate_anthropic_to_openai(
-        self, anthropic_message_request: AnthropicMessagesRequest
-    ) -> ChatCompletionRequest:
-        """
-        This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
-        """
-        new_messages: List[AllMessageValues] = []
-
-        ## CONVERT ANTHROPIC MESSAGES TO OPENAI
-        new_messages = self.translate_anthropic_messages_to_openai(
-            messages=anthropic_message_request["messages"]
-        )
-        ## ADD SYSTEM MESSAGE TO MESSAGES
-        if "system" in anthropic_message_request:
-            new_messages.insert(
-                0,
-                ChatCompletionSystemMessage(
-                    role="system", content=anthropic_message_request["system"]
-                ),
-            )
-
-        new_kwargs: ChatCompletionRequest = {
-            "model": anthropic_message_request["model"],
-            "messages": new_messages,
-        }
-        ## CONVERT METADATA (user_id)
-        if "metadata" in anthropic_message_request:
-            if "user_id" in anthropic_message_request["metadata"]:
-                new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
-
-        # Pass litellm proxy specific metadata
-        if "litellm_metadata" in anthropic_message_request:
-            # metadata will be passed to litellm.acompletion(), it's a litellm_param
-            new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
-
-        ## CONVERT TOOL CHOICE
-        if "tool_choice" in anthropic_message_request:
-            new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
-                tool_choice=anthropic_message_request["tool_choice"]
-            )
-        ## CONVERT TOOLS
-        if "tools" in anthropic_message_request:
-            new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
-                tools=anthropic_message_request["tools"]
-            )
-
-        translatable_params = self.translatable_anthropic_params()
-        for k, v in anthropic_message_request.items():
-            if k not in translatable_params:  # pass remaining params as is
-                new_kwargs[k] = v  # type: ignore
-
-        return new_kwargs
-
-    def _translate_openai_content_to_anthropic(
-        self, choices: List[Choices]
-    ) -> List[
-        Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
-    ]:
-        new_content: List[
-            Union[
-                AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
-            ]
-        ] = []
-        for choice in choices:
-            if (
-                choice.message.tool_calls is not None
-                and len(choice.message.tool_calls) > 0
-            ):
-                for tool_call in choice.message.tool_calls:
-                    new_content.append(
-                        AnthropicResponseContentBlockToolUse(
-                            type="tool_use",
-                            id=tool_call.id,
-                            name=tool_call.function.name or "",
-                            input=json.loads(tool_call.function.arguments),
-                        )
-                    )
-            elif choice.message.content is not None:
-                new_content.append(
-                    AnthropicResponseContentBlockText(
-                        type="text", text=choice.message.content
-                    )
-                )
-
-        return new_content
-
-    def _translate_openai_finish_reason_to_anthropic(
-        self, openai_finish_reason: str
-    ) -> AnthropicFinishReason:
-        if openai_finish_reason == "stop":
-            return "end_turn"
-        elif openai_finish_reason == "length":
-            return "max_tokens"
-        elif openai_finish_reason == "tool_calls":
-            return "tool_use"
-        return "end_turn"
-
-    def translate_openai_response_to_anthropic(
-        self, response: litellm.ModelResponse
-    ) -> AnthropicResponse:
-        ## translate content block
-        anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices)  # type: ignore
-        ## extract finish reason
-        anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
-            openai_finish_reason=response.choices[0].finish_reason  # type: ignore
-        )
-        # extract usage
-        usage: litellm.Usage = getattr(response, "usage")
-        anthropic_usage = AnthropicResponseUsageBlock(
-            input_tokens=usage.prompt_tokens or 0,
-            output_tokens=usage.completion_tokens or 0,
-        )
-        translated_obj = AnthropicResponse(
-            id=response.id,
-            type="message",
-            role="assistant",
-            model=response.model or "unknown-model",
-            stop_sequence=None,
-            usage=anthropic_usage,
-            content=anthropic_content,
-            stop_reason=anthropic_finish_reason,
-        )
-
-        return translated_obj
-
-    def _translate_streaming_openai_chunk_to_anthropic(
-        self, choices: List[OpenAIStreamingChoice]
-    ) -> Tuple[
-        Literal["text_delta", "input_json_delta"],
-        Union[ContentTextBlockDelta, ContentJsonBlockDelta],
-    ]:
-        text: str = ""
-        partial_json: Optional[str] = None
-        for choice in choices:
-            if choice.delta.content is not None:
-                text += choice.delta.content
-            elif choice.delta.tool_calls is not None:
-                partial_json = ""
-                for tool in choice.delta.tool_calls:
-                    if (
-                        tool.function is not None
-                        and tool.function.arguments is not None
-                    ):
-                        partial_json += tool.function.arguments
-
-        if partial_json is not None:
-            return "input_json_delta", ContentJsonBlockDelta(
-                type="input_json_delta", partial_json=partial_json
-            )
-        else:
-            return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
-
-    def translate_streaming_openai_response_to_anthropic(
-        self, response: litellm.ModelResponse
-    ) -> Union[ContentBlockDelta, MessageBlockDelta]:
-        ## base case - final chunk w/ finish reason
-        if response.choices[0].finish_reason is not None:
-            delta = MessageDelta(
-                stop_reason=self._translate_openai_finish_reason_to_anthropic(
-                    response.choices[0].finish_reason
-                ),
-            )
-            if getattr(response, "usage", None) is not None:
-                litellm_usage_chunk: Optional[litellm.Usage] = response.usage  # type: ignore
-            elif (
-                hasattr(response, "_hidden_params")
-                and "usage" in response._hidden_params
-            ):
-                litellm_usage_chunk = response._hidden_params["usage"]
-            else:
-                litellm_usage_chunk = None
-            if litellm_usage_chunk is not None:
-                usage_delta = UsageDelta(
-                    input_tokens=litellm_usage_chunk.prompt_tokens or 0,
-                    output_tokens=litellm_usage_chunk.completion_tokens or 0,
-                )
-            else:
-                usage_delta = UsageDelta(input_tokens=0, output_tokens=0)
-            return MessageBlockDelta(
-                type="message_delta", delta=delta, usage=usage_delta
-            )
-        (
-            type_of_content,
-            content_block_delta,
-        ) = self._translate_streaming_openai_chunk_to_anthropic(
-            choices=response.choices  # type: ignore
-        )
-        return ContentBlockDelta(
-            type="content_block_delta",
-            index=response.choices[0].index,
-            delta=content_block_delta,
-        )
-
-
 # makes headers for API call
 def validate_environment(
    api_key, user_headers, model, messages: List[AllMessageValues]
@ -684,8 +139,14 @@ async def make_call(
            api_base, headers=headers, data=data, stream=True, timeout=timeout
        )
    except httpx.HTTPStatusError as e:
+        error_headers = getattr(e, "headers", None)
+        error_response = getattr(e, "response", None)
+        if error_headers is None and error_response:
+            error_headers = getattr(error_response, "headers", None)
        raise AnthropicError(
-            status_code=e.response.status_code, message=await e.response.aread()
+            status_code=e.response.status_code,
+            message=await e.response.aread(),
+            headers=error_headers,
        )
    except Exception as e:
        for exception in litellm.LITELLM_EXCEPTION_TYPES:
@ -726,8 +187,14 @@ def make_sync_call(
            api_base, headers=headers, data=data, stream=True, timeout=timeout
        )
    except httpx.HTTPStatusError as e:
+        error_headers = getattr(e, "headers", None)
+        error_response = getattr(e, "response", None)
+        if error_headers is None and error_response:
+            error_headers = getattr(error_response, "headers", None)
        raise AnthropicError(
-            status_code=e.response.status_code, message=e.response.read()
+            status_code=e.response.status_code,
+            message=e.response.read(),
+            headers=error_headers,
        )
    except Exception as e:
        for exception in litellm.LITELLM_EXCEPTION_TYPES:
@ -736,7 +203,12 @@ def make_sync_call(
        raise AnthropicError(status_code=500, message=str(e))

    if response.status_code != 200:
-        raise AnthropicError(status_code=response.status_code, message=response.read())
+        response_headers = getattr(response, "headers", None)
+        raise AnthropicError(
+            status_code=response.status_code,
+            message=response.read(),
+            headers=response_headers,
+        )

    completion_stream = ModelResponseIterator(
        streaming_response=response.iter_lines(), sync_stream=True
@ -763,7 +235,7 @@ class AnthropicChatCompletion(BaseLLM):
        response: Union[requests.Response, httpx.Response],
        model_response: ModelResponse,
        stream: bool,
-        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,  # type: ignore
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
@ -772,6 +244,14 @@ class AnthropicChatCompletion(BaseLLM):
        encoding,
        json_mode: bool,
    ) -> ModelResponse:
+        _hidden_params = {}
+        _response_headers = dict(response.headers)
+        if _response_headers is not None:
+            llm_response_headers = {
+                "{}-{}".format("llm_provider", k): v
+                for k, v in _response_headers.items()
+            }
+            _hidden_params["additional_headers"] = llm_response_headers
        ## LOGGING
        logging_obj.post_call(
            input=messages,
@ -783,14 +263,21 @@ class AnthropicChatCompletion(BaseLLM):
        ## RESPONSE OBJECT
        try:
            completion_response = response.json()
-        except:
+        except Exception as e:
+            response_headers = getattr(response, "headers", None)
            raise AnthropicError(
-                message=response.text, status_code=response.status_code
+                message="Unable to get json response - {}, Original Response: {}".format(
+                    str(e), response.text
+                ),
+                status_code=response.status_code,
+                headers=response_headers,
            )
        if "error" in completion_response:
+            response_headers = getattr(response, "headers", None)
            raise AnthropicError(
                message=str(completion_response["error"]),
                status_code=response.status_code,
+                headers=response_headers,
            )
        else:
            text_content = ""
@ -856,6 +343,8 @@ class AnthropicChatCompletion(BaseLLM):
        if "cache_read_input_tokens" in _usage:
            usage["cache_read_input_tokens"] = _usage["cache_read_input_tokens"]
        setattr(model_response, "usage", usage)  # type: ignore
+
+        model_response._hidden_params = _hidden_params
        return model_response

    async def acompletion_stream_function(
@ -919,9 +408,9 @@ class AnthropicChatCompletion(BaseLLM):
        litellm_params=None,
        logger_fn=None,
        headers={},
-        client=None,
+        client: Optional[AsyncHTTPHandler] = None,
    ) -> Union[ModelResponse, CustomStreamWrapper]:
-        async_handler = get_async_httpx_client(
+        async_handler = client or get_async_httpx_client(
            llm_provider=litellm.LlmProviders.ANTHROPIC
        )

@ -937,7 +426,17 @@ class AnthropicChatCompletion(BaseLLM):
                original_response=str(e),
                additional_args={"complete_input_dict": data},
            )
-            raise e
+            status_code = getattr(e, "status_code", 500)
+            error_headers = getattr(e, "headers", None)
+            error_text = getattr(e, "text", str(e))
+            error_response = getattr(e, "response", None)
+            if error_headers is None and error_response:
+                error_headers = getattr(error_response, "headers", None)
+            raise AnthropicError(
+                message=error_text,
+                status_code=status_code,
+                headers=error_headers,
+            )

        return self._process_response(
            model=model,
@ -977,73 +476,18 @@ class AnthropicChatCompletion(BaseLLM):
        _is_function_call = False
        messages = copy.deepcopy(messages)
        optional_params = copy.deepcopy(optional_params)
-        if model in custom_prompt_dict:
-            # check if the model has a registered custom prompt
-            model_prompt_details = custom_prompt_dict[model]
-            prompt = custom_prompt(
-                role_dict=model_prompt_details["roles"],
-                initial_prompt_value=model_prompt_details["initial_prompt_value"],
-                final_prompt_value=model_prompt_details["final_prompt_value"],
-                messages=messages,
-            )
-        else:
-            # Separate system prompt from rest of message
-            anthropic_system_message_list = AnthropicConfig().translate_system_message(
-                messages=messages
-            )
-            # Handling anthropic API Prompt Caching
-            if len(anthropic_system_message_list) > 0:
-                optional_params["system"] = anthropic_system_message_list
-            # Format rest of message according to anthropic guidelines
-            try:
-                messages = prompt_factory(
-                    model=model, messages=messages, custom_llm_provider="anthropic"
-                )
-            except Exception as e:
-                raise AnthropicError(
-                    status_code=400,
-                    message="{}\nReceived Messages={}".format(str(e), messages),
-                )  # don't use verbose_logger.exception, if exception is raised
-
-        ## Load Config
-        config = litellm.AnthropicConfig.get_config()
-        for k, v in config.items():
-            if (
-                k not in optional_params
-            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
-                optional_params[k] = v
-
-        ## Handle Tool Calling
-        if "tools" in optional_params:
-            _is_function_call = True
-            if "anthropic-beta" not in headers:
-                # default to v1 of "anthropic-beta"
-                headers["anthropic-beta"] = "tools-2024-05-16"
-
-            anthropic_tools = []
-            for tool in optional_params["tools"]:
-                if "input_schema" in tool:  # assume in anthropic format
-                    anthropic_tools.append(tool)
-                else:  # assume openai tool call
-                    new_tool = tool["function"]
-                    new_tool["input_schema"] = new_tool.pop("parameters")  # rename key
-                    if "cache_control" in tool:
-                        new_tool["cache_control"] = tool["cache_control"]
-                    anthropic_tools.append(new_tool)
-
-            optional_params["tools"] = anthropic_tools
-
        stream = optional_params.pop("stream", None)
-        is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
        json_mode: bool = optional_params.pop("json_mode", False)
+        is_vertex_request: bool = optional_params.pop("is_vertex_request", False)

-        data = {
-            "messages": messages,
-            **optional_params,
-        }
-
-        if is_vertex_request is False:
-            data["model"] = model
+        data = AnthropicConfig()._transform_request(
+            model=model,
+            messages=messages,
+            optional_params=optional_params,
+            headers=headers,
+            _is_function_call=_is_function_call,
+            is_vertex_request=is_vertex_request,
+        )

        ## LOGGING
        logging_obj.pre_call(
@ -1136,12 +580,25 @@ class AnthropicChatCompletion(BaseLLM):
                    client = HTTPHandler(timeout=timeout)  # type: ignore
                else:
                    client = client
+
+                try:
                    response = client.post(
-                    api_base, headers=headers, data=json.dumps(data), timeout=timeout
+                        api_base,
+                        headers=headers,
+                        data=json.dumps(data),
+                        timeout=timeout,
                    )
-                if response.status_code != 200:
+                except Exception as e:
+                    status_code = getattr(e, "status_code", 500)
+                    error_headers = getattr(e, "headers", None)
+                    error_text = getattr(e, "text", str(e))
+                    error_response = getattr(e, "response", None)
+                    if error_headers is None and error_response:
+                        error_headers = getattr(error_response, "headers", None)
                    raise AnthropicError(
-                        status_code=response.status_code, message=response.text
+                        message=error_text,
+                        status_code=status_code,
+                        headers=error_headers,
                    )

        return self._process_response(
@ -1151,7 +608,7 @@ class AnthropicChatCompletion(BaseLLM):
            stream=stream,
            logging_obj=logging_obj,
            api_key=api_key,
-            data=data,
+            data=data,  # type: ignore
            messages=messages,
            print_verbose=print_verbose,
            optional_params=optional_params,
@ -1192,7 +649,7 @@ class ModelResponseIterator:
        return False

    def _handle_usage(
-        self, anthropic_usage_chunk: dict
+        self, anthropic_usage_chunk: Union[dict, UsageDelta]
    ) -> AnthropicChatCompletionUsageBlock:
        special_fields = ["input_tokens", "output_tokens"]

@ -1203,15 +660,19 @@ class ModelResponseIterator:
            + anthropic_usage_chunk.get("output_tokens", 0),
        )

-        if "cache_creation_input_tokens" in anthropic_usage_chunk:
-            usage_block["cache_creation_input_tokens"] = anthropic_usage_chunk[
+        cache_creation_input_tokens = anthropic_usage_chunk.get(
            "cache_creation_input_tokens"
-            ]
+        )
+        if cache_creation_input_tokens is not None and isinstance(
+            cache_creation_input_tokens, int
+        ):
+            usage_block["cache_creation_input_tokens"] = cache_creation_input_tokens

-        if "cache_read_input_tokens" in anthropic_usage_chunk:
-            usage_block["cache_read_input_tokens"] = anthropic_usage_chunk[
-                "cache_read_input_tokens"
-            ]
+        cache_read_input_tokens = anthropic_usage_chunk.get("cache_read_input_tokens")
+        if cache_read_input_tokens is not None and isinstance(
+            cache_read_input_tokens, int
+        ):
+            usage_block["cache_read_input_tokens"] = cache_read_input_tokens

        return usage_block

@ -1313,6 +774,7 @@ class ModelResponseIterator:
                }
                """
                message_start_block = MessageStartBlock(**chunk)  # type: ignore
+                if "usage" in message_start_block["message"]:
                    usage = self._handle_usage(
                        anthropic_usage_chunk=message_start_block["message"]["usage"]
                    )
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -0,0 +1,289 @@
+import types
+from typing import List, Literal, Optional, Tuple, Union
+
+import litellm
+from litellm.llms.prompt_templates.factory import anthropic_messages_pt
+from litellm.types.llms.anthropic import (
+    AnthropicMessageRequestBase,
+    AnthropicMessagesRequest,
+    AnthropicMessagesToolChoice,
+    AnthropicSystemMessageContent,
+)
+from litellm.types.llms.openai import AllMessageValues, ChatCompletionSystemMessage
+from litellm.utils import has_tool_call_blocks
+
+from ..common_utils import AnthropicError
+
+
+class AnthropicConfig:
+    """
+    Reference: https://docs.anthropic.com/claude/reference/messages_post
+
+    to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
+    """
+
+    max_tokens: Optional[int] = (
+        4096  # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default)
+    )
+    stop_sequences: Optional[list] = None
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    top_k: Optional[int] = None
+    metadata: Optional[dict] = None
+    system: Optional[str] = None
+
+    def __init__(
+        self,
+        max_tokens: Optional[
+            int
+        ] = 4096,  # You can pass in a value yourself or use the default value 4096
+        stop_sequences: Optional[list] = None,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        top_k: Optional[int] = None,
+        metadata: Optional[dict] = None,
+        system: Optional[str] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params(self):
+        return [
+            "stream",
+            "stop",
+            "temperature",
+            "top_p",
+            "max_tokens",
+            "max_completion_tokens",
+            "tools",
+            "tool_choice",
+            "extra_headers",
+        ]
+
+    def get_cache_control_headers(self) -> dict:
+        return {
+            "anthropic-version": "2023-06-01",
+            "anthropic-beta": "prompt-caching-2024-07-31",
+        }
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        messages: Optional[List[AllMessageValues]] = None,
+    ):
+        for param, value in non_default_params.items():
+            if param == "max_tokens":
+                optional_params["max_tokens"] = value
+            if param == "max_completion_tokens":
+                optional_params["max_tokens"] = value
+            if param == "tools":
+                optional_params["tools"] = value
+            if param == "tool_choice":
+                _tool_choice: Optional[AnthropicMessagesToolChoice] = None
+                if value == "auto":
+                    _tool_choice = {"type": "auto"}
+                elif value == "required":
+                    _tool_choice = {"type": "any"}
+                elif isinstance(value, dict):
+                    _tool_choice = {"type": "tool", "name": value["function"]["name"]}
+
+                if _tool_choice is not None:
+                    optional_params["tool_choice"] = _tool_choice
+            if param == "stream" and value is True:
+                optional_params["stream"] = value
+            if param == "stop":
+                if isinstance(value, str):
+                    if (
+                        value == "\n"
+                    ) and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
+                        continue
+                    value = [value]
+                elif isinstance(value, list):
+                    new_v = []
+                    for v in value:
+                        if (
+                            v == "\n"
+                        ) and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
+                            continue
+                        new_v.append(v)
+                    if len(new_v) > 0:
+                        value = new_v
+                    else:
+                        continue
+                optional_params["stop_sequences"] = value
+            if param == "temperature":
+                optional_params["temperature"] = value
+            if param == "top_p":
+                optional_params["top_p"] = value
+
+        ## VALIDATE REQUEST
+        """
+        Anthropic doesn't support tool calling without `tools=` param specified.
+        """
+        if (
+            "tools" not in non_default_params
+            and messages is not None
+            and has_tool_call_blocks(messages)
+        ):
+            raise litellm.UnsupportedParamsError(
+                message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
+                model="",
+                llm_provider="anthropic",
+            )
+
+        return optional_params
+
+    def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
+        """
+        Return if {"cache_control": ..} in message content block
+
+        Used to check if anthropic prompt caching headers need to be set.
+        """
+        for message in messages:
+            _message_content = message.get("content")
+            if _message_content is not None and isinstance(_message_content, list):
+                for content in _message_content:
+                    if "cache_control" in content:
+                        return True
+
+        return False
+
+    def translate_system_message(
+        self, messages: List[AllMessageValues]
+    ) -> List[AnthropicSystemMessageContent]:
+        """
+        Translate system message to anthropic format.
+
+        Removes system message from the original list and returns a new list of anthropic system message content.
+        """
+        system_prompt_indices = []
+        anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
+        for idx, message in enumerate(messages):
+            if message["role"] == "system":
+                valid_content: bool = False
+                system_message_block = ChatCompletionSystemMessage(**message)
+                if isinstance(system_message_block["content"], str):
+                    anthropic_system_message_content = AnthropicSystemMessageContent(
+                        type="text",
+                        text=system_message_block["content"],
+                    )
+                    if "cache_control" in system_message_block:
+                        anthropic_system_message_content["cache_control"] = (
+                            system_message_block["cache_control"]
+                        )
+                    anthropic_system_message_list.append(
+                        anthropic_system_message_content
+                    )
+                    valid_content = True
+                elif isinstance(message["content"], list):
+                    for _content in message["content"]:
+                        anthropic_system_message_content = (
+                            AnthropicSystemMessageContent(
+                                type=_content.get("type"),
+                                text=_content.get("text"),
+                            )
+                        )
+                        if "cache_control" in _content:
+                            anthropic_system_message_content["cache_control"] = (
+                                _content["cache_control"]
+                            )
+
+                        anthropic_system_message_list.append(
+                            anthropic_system_message_content
+                        )
+                    valid_content = True
+
+                if valid_content:
+                    system_prompt_indices.append(idx)
+        if len(system_prompt_indices) > 0:
+            for idx in reversed(system_prompt_indices):
+                messages.pop(idx)
+
+        return anthropic_system_message_list
+
+    def _transform_request(
+        self,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        headers: dict,
+        _is_function_call: bool,
+        is_vertex_request: bool,
+    ) -> dict:
+        """
+        Translate messages to anthropic format.
+        """
+        # Separate system prompt from rest of message
+        anthropic_system_message_list = self.translate_system_message(messages=messages)
+        # Handling anthropic API Prompt Caching
+        if len(anthropic_system_message_list) > 0:
+            optional_params["system"] = anthropic_system_message_list
+        # Format rest of message according to anthropic guidelines
+        try:
+            anthropic_messages = anthropic_messages_pt(
+                model=model,
+                messages=messages,
+                llm_provider="anthropic",
+            )
+        except Exception as e:
+            raise AnthropicError(
+                status_code=400,
+                message="{}\nReceived Messages={}".format(str(e), messages),
+            )  # don't use verbose_logger.exception, if exception is raised
+
+        ## Load Config
+        config = litellm.AnthropicConfig.get_config()
+        for k, v in config.items():
+            if (
+                k not in optional_params
+            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                optional_params[k] = v
+
+        ## Handle Tool Calling
+        if "tools" in optional_params:
+            _is_function_call = True
+            if "anthropic-beta" not in headers:
+                # default to v1 of "anthropic-beta"
+                headers["anthropic-beta"] = "tools-2024-05-16"
+
+            anthropic_tools = []
+            for tool in optional_params["tools"]:
+                if "input_schema" in tool:  # assume in anthropic format
+                    anthropic_tools.append(tool)
+                else:  # assume openai tool call
+                    new_tool = tool["function"]
+                    new_tool["input_schema"] = new_tool.pop("parameters")  # rename key
+                    if "cache_control" in tool:
+                        new_tool["cache_control"] = tool["cache_control"]
+                    anthropic_tools.append(new_tool)
+
+            optional_params["tools"] = anthropic_tools
+
+        data = {
+            "messages": anthropic_messages,
+            **optional_params,
+        }
+        if not is_vertex_request:
+            data["model"] = model
+        return data
--- a/litellm/llms/anthropic/common_utils.py
+++ b/litellm/llms/anthropic/common_utils.py
@ -0,0 +1,26 @@
+"""
+This file contains common utils for anthropic calls.
+"""
+
+from typing import Optional
+
+import httpx
+
+
+class AnthropicError(Exception):
+    def __init__(
+        self,
+        status_code: int,
+        message,
+        headers: Optional[httpx.Headers] = None,
+    ):
+        self.status_code = status_code
+        self.message: str = message
+        self.headers = headers
+        self.request = httpx.Request(
+            method="POST", url="https://api.anthropic.com/v1/messages"
+        )
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
--- a/litellm/llms/anthropic/experimental_pass_through/transformation.py
+++ b/litellm/llms/anthropic/experimental_pass_through/transformation.py
@ -0,0 +1,425 @@
+import json
+import types
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+
+from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice
+
+import litellm
+from litellm.types.llms.anthropic import (
+    AnthopicMessagesAssistantMessageParam,
+    AnthropicChatCompletionUsageBlock,
+    AnthropicFinishReason,
+    AnthropicMessagesRequest,
+    AnthropicMessagesTool,
+    AnthropicMessagesToolChoice,
+    AnthropicMessagesUserMessageParam,
+    AnthropicResponse,
+    AnthropicResponseContentBlockText,
+    AnthropicResponseContentBlockToolUse,
+    AnthropicResponseUsageBlock,
+    AnthropicSystemMessageContent,
+    ContentBlockDelta,
+    ContentBlockStart,
+    ContentBlockStop,
+    ContentJsonBlockDelta,
+    ContentTextBlockDelta,
+    MessageBlockDelta,
+    MessageDelta,
+    MessageStartBlock,
+    UsageDelta,
+)
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    ChatCompletionAssistantMessage,
+    ChatCompletionAssistantToolCall,
+    ChatCompletionImageObject,
+    ChatCompletionImageUrlObject,
+    ChatCompletionRequest,
+    ChatCompletionResponseMessage,
+    ChatCompletionSystemMessage,
+    ChatCompletionTextObject,
+    ChatCompletionToolCallChunk,
+    ChatCompletionToolCallFunctionChunk,
+    ChatCompletionToolChoiceFunctionParam,
+    ChatCompletionToolChoiceObjectParam,
+    ChatCompletionToolChoiceValues,
+    ChatCompletionToolMessage,
+    ChatCompletionToolParam,
+    ChatCompletionToolParamFunctionChunk,
+    ChatCompletionUsageBlock,
+    ChatCompletionUserMessage,
+    OpenAIMessageContent,
+)
+from litellm.types.utils import Choices, GenericStreamingChunk
+from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
+
+from ...base import BaseLLM
+from ...prompt_templates.factory import (
+    anthropic_messages_pt,
+    custom_prompt,
+    prompt_factory,
+)
+
+
+class AnthropicExperimentalPassThroughConfig:
+    def __init__(self):
+        pass
+
+    ### FOR [BETA] `/v1/messages` endpoint support
+
+    def translatable_anthropic_params(self) -> List:
+        """
+        Which anthropic params, we need to translate to the openai format.
+        """
+        return ["messages", "metadata", "system", "tool_choice", "tools"]
+
+    def translate_anthropic_messages_to_openai(
+        self,
+        messages: List[
+            Union[
+                AnthropicMessagesUserMessageParam,
+                AnthopicMessagesAssistantMessageParam,
+            ]
+        ],
+    ) -> List:
+        new_messages: List[AllMessageValues] = []
+        for m in messages:
+            user_message: Optional[ChatCompletionUserMessage] = None
+            tool_message_list: List[ChatCompletionToolMessage] = []
+            new_user_content_list: List[
+                Union[ChatCompletionTextObject, ChatCompletionImageObject]
+            ] = []
+            ## USER MESSAGE ##
+            if m["role"] == "user":
+                ## translate user message
+                message_content = m.get("content")
+                if message_content and isinstance(message_content, str):
+                    user_message = ChatCompletionUserMessage(
+                        role="user", content=message_content
+                    )
+                elif message_content and isinstance(message_content, list):
+                    for content in message_content:
+                        if content["type"] == "text":
+                            text_obj = ChatCompletionTextObject(
+                                type="text", text=content["text"]
+                            )
+                            new_user_content_list.append(text_obj)
+                        elif content["type"] == "image":
+                            image_url = ChatCompletionImageUrlObject(
+                                url=f"data:{content['type']};base64,{content['source']}"
+                            )
+                            image_obj = ChatCompletionImageObject(
+                                type="image_url", image_url=image_url
+                            )
+
+                            new_user_content_list.append(image_obj)
+                        elif content["type"] == "tool_result":
+                            if "content" not in content:
+                                tool_result = ChatCompletionToolMessage(
+                                    role="tool",
+                                    tool_call_id=content["tool_use_id"],
+                                    content="",
+                                )
+                                tool_message_list.append(tool_result)
+                            elif isinstance(content["content"], str):
+                                tool_result = ChatCompletionToolMessage(
+                                    role="tool",
+                                    tool_call_id=content["tool_use_id"],
+                                    content=content["content"],
+                                )
+                                tool_message_list.append(tool_result)
+                            elif isinstance(content["content"], list):
+                                for c in content["content"]:
+                                    if c["type"] == "text":
+                                        tool_result = ChatCompletionToolMessage(
+                                            role="tool",
+                                            tool_call_id=content["tool_use_id"],
+                                            content=c["text"],
+                                        )
+                                        tool_message_list.append(tool_result)
+                                    elif c["type"] == "image":
+                                        image_str = (
+                                            f"data:{c['type']};base64,{c['source']}"
+                                        )
+                                        tool_result = ChatCompletionToolMessage(
+                                            role="tool",
+                                            tool_call_id=content["tool_use_id"],
+                                            content=image_str,
+                                        )
+                                        tool_message_list.append(tool_result)
+
+            if user_message is not None:
+                new_messages.append(user_message)
+
+            if len(new_user_content_list) > 0:
+                new_messages.append({"role": "user", "content": new_user_content_list})  # type: ignore
+
+            if len(tool_message_list) > 0:
+                new_messages.extend(tool_message_list)
+
+            ## ASSISTANT MESSAGE ##
+            assistant_message_str: Optional[str] = None
+            tool_calls: List[ChatCompletionAssistantToolCall] = []
+            if m["role"] == "assistant":
+                if isinstance(m["content"], str):
+                    assistant_message_str = m["content"]
+                elif isinstance(m["content"], list):
+                    for content in m["content"]:
+                        if content["type"] == "text":
+                            if assistant_message_str is None:
+                                assistant_message_str = content["text"]
+                            else:
+                                assistant_message_str += content["text"]
+                        elif content["type"] == "tool_use":
+                            function_chunk = ChatCompletionToolCallFunctionChunk(
+                                name=content["name"],
+                                arguments=json.dumps(content["input"]),
+                            )
+
+                            tool_calls.append(
+                                ChatCompletionAssistantToolCall(
+                                    id=content["id"],
+                                    type="function",
+                                    function=function_chunk,
+                                )
+                            )
+
+            if assistant_message_str is not None or len(tool_calls) > 0:
+                assistant_message = ChatCompletionAssistantMessage(
+                    role="assistant",
+                    content=assistant_message_str,
+                )
+                if len(tool_calls) > 0:
+                    assistant_message["tool_calls"] = tool_calls
+                new_messages.append(assistant_message)
+
+        return new_messages
+
+    def translate_anthropic_tool_choice_to_openai(
+        self, tool_choice: AnthropicMessagesToolChoice
+    ) -> ChatCompletionToolChoiceValues:
+        if tool_choice["type"] == "any":
+            return "required"
+        elif tool_choice["type"] == "auto":
+            return "auto"
+        elif tool_choice["type"] == "tool":
+            tc_function_param = ChatCompletionToolChoiceFunctionParam(
+                name=tool_choice.get("name", "")
+            )
+            return ChatCompletionToolChoiceObjectParam(
+                type="function", function=tc_function_param
+            )
+        else:
+            raise ValueError(
+                "Incompatible tool choice param submitted - {}".format(tool_choice)
+            )
+
+    def translate_anthropic_tools_to_openai(
+        self, tools: List[AnthropicMessagesTool]
+    ) -> List[ChatCompletionToolParam]:
+        new_tools: List[ChatCompletionToolParam] = []
+        for tool in tools:
+            function_chunk = ChatCompletionToolParamFunctionChunk(
+                name=tool["name"],
+                parameters=tool["input_schema"],
+            )
+            if "description" in tool:
+                function_chunk["description"] = tool["description"]
+            new_tools.append(
+                ChatCompletionToolParam(type="function", function=function_chunk)
+            )
+
+        return new_tools
+
+    def translate_anthropic_to_openai(
+        self, anthropic_message_request: AnthropicMessagesRequest
+    ) -> ChatCompletionRequest:
+        """
+        This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
+        """
+        new_messages: List[AllMessageValues] = []
+
+        ## CONVERT ANTHROPIC MESSAGES TO OPENAI
+        new_messages = self.translate_anthropic_messages_to_openai(
+            messages=anthropic_message_request["messages"]
+        )
+        ## ADD SYSTEM MESSAGE TO MESSAGES
+        if "system" in anthropic_message_request:
+            new_messages.insert(
+                0,
+                ChatCompletionSystemMessage(
+                    role="system", content=anthropic_message_request["system"]
+                ),
+            )
+
+        new_kwargs: ChatCompletionRequest = {
+            "model": anthropic_message_request["model"],
+            "messages": new_messages,
+        }
+        ## CONVERT METADATA (user_id)
+        if "metadata" in anthropic_message_request:
+            if "user_id" in anthropic_message_request["metadata"]:
+                new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
+
+        # Pass litellm proxy specific metadata
+        if "litellm_metadata" in anthropic_message_request:
+            # metadata will be passed to litellm.acompletion(), it's a litellm_param
+            new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
+
+        ## CONVERT TOOL CHOICE
+        if "tool_choice" in anthropic_message_request:
+            new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
+                tool_choice=anthropic_message_request["tool_choice"]
+            )
+        ## CONVERT TOOLS
+        if "tools" in anthropic_message_request:
+            new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
+                tools=anthropic_message_request["tools"]
+            )
+
+        translatable_params = self.translatable_anthropic_params()
+        for k, v in anthropic_message_request.items():
+            if k not in translatable_params:  # pass remaining params as is
+                new_kwargs[k] = v  # type: ignore
+
+        return new_kwargs
+
+    def _translate_openai_content_to_anthropic(
+        self, choices: List[Choices]
+    ) -> List[
+        Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
+    ]:
+        new_content: List[
+            Union[
+                AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
+            ]
+        ] = []
+        for choice in choices:
+            if (
+                choice.message.tool_calls is not None
+                and len(choice.message.tool_calls) > 0
+            ):
+                for tool_call in choice.message.tool_calls:
+                    new_content.append(
+                        AnthropicResponseContentBlockToolUse(
+                            type="tool_use",
+                            id=tool_call.id,
+                            name=tool_call.function.name or "",
+                            input=json.loads(tool_call.function.arguments),
+                        )
+                    )
+            elif choice.message.content is not None:
+                new_content.append(
+                    AnthropicResponseContentBlockText(
+                        type="text", text=choice.message.content
+                    )
+                )
+
+        return new_content
+
+    def _translate_openai_finish_reason_to_anthropic(
+        self, openai_finish_reason: str
+    ) -> AnthropicFinishReason:
+        if openai_finish_reason == "stop":
+            return "end_turn"
+        elif openai_finish_reason == "length":
+            return "max_tokens"
+        elif openai_finish_reason == "tool_calls":
+            return "tool_use"
+        return "end_turn"
+
+    def translate_openai_response_to_anthropic(
+        self, response: litellm.ModelResponse
+    ) -> AnthropicResponse:
+        ## translate content block
+        anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices)  # type: ignore
+        ## extract finish reason
+        anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
+            openai_finish_reason=response.choices[0].finish_reason  # type: ignore
+        )
+        # extract usage
+        usage: litellm.Usage = getattr(response, "usage")
+        anthropic_usage = AnthropicResponseUsageBlock(
+            input_tokens=usage.prompt_tokens or 0,
+            output_tokens=usage.completion_tokens or 0,
+        )
+        translated_obj = AnthropicResponse(
+            id=response.id,
+            type="message",
+            role="assistant",
+            model=response.model or "unknown-model",
+            stop_sequence=None,
+            usage=anthropic_usage,
+            content=anthropic_content,
+            stop_reason=anthropic_finish_reason,
+        )
+
+        return translated_obj
+
+    def _translate_streaming_openai_chunk_to_anthropic(
+        self, choices: List[OpenAIStreamingChoice]
+    ) -> Tuple[
+        Literal["text_delta", "input_json_delta"],
+        Union[ContentTextBlockDelta, ContentJsonBlockDelta],
+    ]:
+        text: str = ""
+        partial_json: Optional[str] = None
+        for choice in choices:
+            if choice.delta.content is not None:
+                text += choice.delta.content
+            elif choice.delta.tool_calls is not None:
+                partial_json = ""
+                for tool in choice.delta.tool_calls:
+                    if (
+                        tool.function is not None
+                        and tool.function.arguments is not None
+                    ):
+                        partial_json += tool.function.arguments
+
+        if partial_json is not None:
+            return "input_json_delta", ContentJsonBlockDelta(
+                type="input_json_delta", partial_json=partial_json
+            )
+        else:
+            return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
+
+    def translate_streaming_openai_response_to_anthropic(
+        self, response: litellm.ModelResponse
+    ) -> Union[ContentBlockDelta, MessageBlockDelta]:
+        ## base case - final chunk w/ finish reason
+        if response.choices[0].finish_reason is not None:
+            delta = MessageDelta(
+                stop_reason=self._translate_openai_finish_reason_to_anthropic(
+                    response.choices[0].finish_reason
+                ),
+            )
+            if getattr(response, "usage", None) is not None:
+                litellm_usage_chunk: Optional[litellm.Usage] = response.usage  # type: ignore
+            elif (
+                hasattr(response, "_hidden_params")
+                and "usage" in response._hidden_params
+            ):
+                litellm_usage_chunk = response._hidden_params["usage"]
+            else:
+                litellm_usage_chunk = None
+            if litellm_usage_chunk is not None:
+                usage_delta = UsageDelta(
+                    input_tokens=litellm_usage_chunk.prompt_tokens or 0,
+                    output_tokens=litellm_usage_chunk.completion_tokens or 0,
+                )
+            else:
+                usage_delta = UsageDelta(input_tokens=0, output_tokens=0)
+            return MessageBlockDelta(
+                type="message_delta", delta=delta, usage=usage_delta
+            )
+        (
+            type_of_content,
+            content_block_delta,
+        ) = self._translate_streaming_openai_chunk_to_anthropic(
+            choices=response.choices  # type: ignore
+        )
+        return ContentBlockDelta(
+            type="content_block_delta",
+            index=response.choices[0].index,
+            delta=content_block_delta,
+        )
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@ -22,7 +22,7 @@ from litellm.types.llms.openai import (
    ChatCompletionToolParamFunctionChunk,
 )
 from litellm.types.utils import ModelResponse, Usage
-from litellm.utils import CustomStreamWrapper
+from litellm.utils import CustomStreamWrapper, has_tool_call_blocks

 from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt
 from ..common_utils import BedrockError, get_bedrock_tool_name
@ -136,6 +136,7 @@ class AmazonConverseConfig:
        non_default_params: dict,
        optional_params: dict,
        drop_params: bool,
+        messages: Optional[List[AllMessageValues]] = None,
    ) -> dict:
        for param, value in non_default_params.items():
            if param == "response_format":
@ -202,6 +203,21 @@ class AmazonConverseConfig:
                )
                if _tool_choice_value is not None:
                    optional_params["tool_choice"] = _tool_choice_value
+
+        ## VALIDATE REQUEST
+        """
+        Bedrock doesn't support tool calling without `tools=` param specified.
+        """
+        if (
+            "tools" not in non_default_params
+            and messages is not None
+            and has_tool_call_blocks(messages)
+        ):
+            raise litellm.UnsupportedParamsError(
+                message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
+                model="",
+                llm_provider="anthropic",
+            )
        return optional_params

    def _transform_request(
--- a/litellm/llms/groq/chat/handler.py
+++ b/litellm/llms/groq/chat/handler.py
@ -0,0 +1,60 @@
+"""
+Handles the chat completion request for groq
+"""
+
+from typing import Any, Callable, Optional, Union
+
+from httpx._config import Timeout
+
+from litellm.utils import ModelResponse
+
+from ...groq.chat.transformation import GroqChatConfig
+from ...OpenAI.openai import OpenAIChatCompletion
+
+
+class GroqChatCompletion(OpenAIChatCompletion):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def completion(
+        self,
+        model_response: ModelResponse,
+        timeout: Union[float, Timeout],
+        optional_params: dict,
+        logging_obj: Any,
+        model: Optional[str] = None,
+        messages: Optional[list] = None,
+        print_verbose: Optional[Callable[..., Any]] = None,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        acompletion: bool = False,
+        litellm_params=None,
+        logger_fn=None,
+        headers: Optional[dict] = None,
+        custom_prompt_dict: dict = {},
+        client=None,
+        organization: Optional[str] = None,
+        custom_llm_provider: Optional[str] = None,
+        drop_params: Optional[bool] = None,
+    ):
+        messages = GroqChatConfig()._transform_messages(messages)  # type: ignore
+        return super().completion(
+            model_response,
+            timeout,
+            optional_params,
+            logging_obj,
+            model,
+            messages,
+            print_verbose,
+            api_key,
+            api_base,
+            acompletion,
+            litellm_params,
+            logger_fn,
+            headers,
+            custom_prompt_dict,
+            client,
+            organization,
+            custom_llm_provider,
+            drop_params,
+        )
--- a/litellm/llms/groq/chat/transformation.py
+++ b/litellm/llms/groq/chat/transformation.py
@ -0,0 +1,88 @@
+"""
+Translate from OpenAI's `/v1/chat/completions` to Groq's `/v1/chat/completions`
+"""
+
+import types
+from typing import List, Optional, Union
+
+from pydantic import BaseModel
+
+import litellm
+from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
+
+from ...OpenAI.chat.gpt_transformation import OpenAIGPTConfig
+
+
+class GroqChatConfig(OpenAIGPTConfig):
+
+    frequency_penalty: Optional[int] = None
+    function_call: Optional[Union[str, dict]] = None
+    functions: Optional[list] = None
+    logit_bias: Optional[dict] = None
+    max_tokens: Optional[int] = None
+    n: Optional[int] = None
+    presence_penalty: Optional[int] = None
+    stop: Optional[Union[str, list]] = None
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    response_format: Optional[dict] = None
+    tools: Optional[list] = None
+    tool_choice: Optional[Union[str, dict]] = None
+
+    def __init__(
+        self,
+        frequency_penalty: Optional[int] = None,
+        function_call: Optional[Union[str, dict]] = None,
+        functions: Optional[list] = None,
+        logit_bias: Optional[dict] = None,
+        max_tokens: Optional[int] = None,
+        n: Optional[int] = None,
+        presence_penalty: Optional[int] = None,
+        stop: Optional[Union[str, list]] = None,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        response_format: Optional[dict] = None,
+        tools: Optional[list] = None,
+        tool_choice: Optional[Union[str, dict]] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def _transform_messages(self, messages: List[AllMessageValues]) -> List:
+        for idx, message in enumerate(messages):
+            """
+            1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839
+            """
+            if isinstance(message, BaseModel):
+                _message = message.model_dump()
+            else:
+                _message = message
+            assistant_message = _message.get("role") == "assistant"
+            if assistant_message:
+                new_message = ChatCompletionAssistantMessage(role="assistant")
+                for k, v in _message.items():
+                    if v is not None:
+                        new_message[k] = v  # type: ignore
+                messages[idx] = new_message
+
+        return messages
--- a/litellm/llms/groq/stt/transformation.py
+++ b/litellm/llms/groq/stt/transformation.py
@ -0,0 +1,101 @@
+"""
+Translate from OpenAI's `/v1/audio/transcriptions` to Groq's `/v1/audio/transcriptions`
+"""
+
+import types
+from typing import List, Optional, Union
+
+import litellm
+
+
+class GroqSTTConfig:
+
+    frequency_penalty: Optional[int] = None
+    function_call: Optional[Union[str, dict]] = None
+    functions: Optional[list] = None
+    logit_bias: Optional[dict] = None
+    max_tokens: Optional[int] = None
+    n: Optional[int] = None
+    presence_penalty: Optional[int] = None
+    stop: Optional[Union[str, list]] = None
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    response_format: Optional[dict] = None
+    tools: Optional[list] = None
+    tool_choice: Optional[Union[str, dict]] = None
+
+    def __init__(
+        self,
+        frequency_penalty: Optional[int] = None,
+        function_call: Optional[Union[str, dict]] = None,
+        functions: Optional[list] = None,
+        logit_bias: Optional[dict] = None,
+        max_tokens: Optional[int] = None,
+        n: Optional[int] = None,
+        presence_penalty: Optional[int] = None,
+        stop: Optional[Union[str, list]] = None,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        response_format: Optional[dict] = None,
+        tools: Optional[list] = None,
+        tool_choice: Optional[Union[str, dict]] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params_stt(self):
+        return [
+            "prompt",
+            "response_format",
+            "temperature",
+            "language",
+        ]
+
+    def get_supported_openai_response_formats_stt(self) -> List[str]:
+        return ["json", "verbose_json", "text"]
+
+    def map_openai_params_stt(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        response_formats = self.get_supported_openai_response_formats_stt()
+        for param, value in non_default_params.items():
+            if param == "response_format":
+                if value in response_formats:
+                    optional_params[param] = value
+                else:
+                    if litellm.drop_params is True or drop_params is True:
+                        pass
+                    else:
+                        raise litellm.utils.UnsupportedParamsError(
+                            message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
+                                value
+                            ),
+                            status_code=400,
+                        )
+            else:
+                optional_params[param] = value
+        return optional_params
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_anthropic.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_anthropic.py
@ -276,7 +276,7 @@ def completion(

    from anthropic import AnthropicVertex

-    from litellm.llms.anthropic.chat import AnthropicChatCompletion
+    from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion
    from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
        VertexLLM,
    )
@ -367,7 +367,7 @@ async def async_completion(

    if client is None:
        vertex_ai_client = AsyncAnthropicVertex(
-            project_id=vertex_project, region=vertex_location, access_token=access_token
+            project_id=vertex_project, region=vertex_location, access_token=access_token  # type: ignore
        )
    else:
        vertex_ai_client = client
@ -438,7 +438,7 @@ async def async_streaming(

    if client is None:
        vertex_ai_client = AsyncAnthropicVertex(
-            project_id=vertex_project, region=vertex_location, access_token=access_token
+            project_id=vertex_project, region=vertex_location, access_token=access_token  # type: ignore
        )
    else:
        vertex_ai_client = client
--- a/litellm/main.py
+++ b/litellm/main.py
@ -96,6 +96,7 @@ from .llms.cohere import completion as cohere_completion  # type: ignore
 from .llms.cohere import embed as cohere_embed
 from .llms.custom_llm import CustomLLM, custom_chat_llm_router
 from .llms.databricks.chat import DatabricksChatCompletion
+from .llms.groq.chat.handler import GroqChatCompletion
 from .llms.huggingface_restapi import Huggingface
 from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
 from .llms.OpenAI.chat.o1_handler import OpenAIO1ChatCompletion
@ -168,6 +169,7 @@ openai_text_completions = OpenAITextCompletion()
 openai_o1_chat_completions = OpenAIO1ChatCompletion()
 openai_audio_transcriptions = OpenAIAudioTranscription()
 databricks_chat_completions = DatabricksChatCompletion()
+groq_chat_completions = GroqChatCompletion()
 azure_ai_chat_completions = AzureAIChatCompletion()
 azure_ai_embedding = AzureAIEmbedding()
 anthropic_chat_completions = AnthropicChatCompletion()
@ -958,6 +960,7 @@ def completion(
            extra_headers=extra_headers,
            api_version=api_version,
            parallel_tool_calls=parallel_tool_calls,
+            messages=messages,
            **non_default_params,
        )

@ -1318,13 +1321,56 @@ def completion(
                    additional_args={"headers": headers},
                )
            response = _response
+        elif custom_llm_provider == "groq":
+            api_base = (
+                api_base  # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there
+                or litellm.api_base
+                or get_secret("GROQ_API_BASE")
+                or "https://api.groq.com/openai/v1"
+            )

+            # set API KEY
+            api_key = (
+                api_key
+                or litellm.api_key  # for deepinfra/perplexity/anyscale/friendliai we check in get_llm_provider and pass in the api key from there
+                or litellm.groq_key
+                or get_secret("GROQ_API_KEY")
+            )
+
+            headers = headers or litellm.headers
+
+            ## LOAD CONFIG - if set
+            config = litellm.GroqChatConfig.get_config()
+            for k, v in config.items():
+                if (
+                    k not in optional_params
+                ):  # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in
+                    optional_params[k] = v
+
+            response = groq_chat_completions.completion(
+                model=model,
+                messages=messages,
+                headers=headers,
+                model_response=model_response,
+                print_verbose=print_verbose,
+                api_key=api_key,
+                api_base=api_base,
+                acompletion=acompletion,
+                logging_obj=logging,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+                logger_fn=logger_fn,
+                timeout=timeout,  # type: ignore
+                custom_prompt_dict=custom_prompt_dict,
+                client=client,  # pass AsyncOpenAI, OpenAI client
+                organization=organization,
+                custom_llm_provider=custom_llm_provider,
+            )
        elif (
            model in litellm.open_ai_chat_completion_models
            or custom_llm_provider == "custom_openai"
            or custom_llm_provider == "deepinfra"
            or custom_llm_provider == "perplexity"
-            or custom_llm_provider == "groq"
            or custom_llm_provider == "nvidia_nim"
            or custom_llm_provider == "cerebras"
            or custom_llm_provider == "sambanova"
@ -1431,6 +1477,7 @@ def completion(
                    original_response=response,
                    additional_args={"headers": headers},
                )
+
        elif (
            "replicate" in model
            or custom_llm_provider == "replicate"
@ -2933,6 +2980,7 @@ def batch_completion(
    deployment_id=None,
    request_timeout: Optional[int] = None,
    timeout: Optional[int] = 600,
+    max_workers:Optional[int]= 100,
    # Optional liteLLM function params
    **kwargs,
 ):
@ -2956,6 +3004,7 @@ def batch_completion(
        user (str, optional): The user string for generating completions. Defaults to "".
        deployment_id (optional): The deployment ID for generating completions. Defaults to None.
        request_timeout (int, optional): The request timeout for generating completions. Defaults to None.
+        max_workers (int,optional): The maximum number of threads to use for parallel processing.

    Returns:
        list: A list of completion results.
@ -3001,7 +3050,7 @@ def batch_completion(
            for i in range(0, len(lst), n):
                yield lst[i : i + n]

-        with ThreadPoolExecutor(max_workers=100) as executor:
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            for sub_batch in chunks(batch_messages, 100):
                for message_list in sub_batch:
                    kwargs_modified = args.copy()
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -1173,6 +1173,18 @@
        "supports_function_calling": true,
        "supports_assistant_prefill": true
    },
+    "mistral/pixtral-12b-2409": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000015,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true,
+        "supports_vision": true
+    },
    "mistral/open-mistral-7b": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@ -760,7 +760,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):

            return _user_id_rate_limits.model_dump()
        except Exception as e:
-            verbose_proxy_logger.exception(
+            verbose_proxy_logger.debug(
                "Parallel Request Limiter: Error getting user object", str(e)
            )
            return None
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@ -389,6 +389,9 @@ async def add_litellm_data_to_request(
        user_api_key_dict=user_api_key_dict,
    )

+    verbose_proxy_logger.debug(
+        f"[PROXY]returned data from litellm_pre_call_utils: {data}"
+    )
    return data


--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -1466,9 +1466,6 @@ class PrismaClient:
    ):
        args_passed_in = locals()
        start_time = time.time()
-        verbose_proxy_logger.debug(
-            f"PrismaClient: get_data - args_passed_in: {args_passed_in}"
-        )
        hashed_token: Optional[str] = None
        try:
            response: Any = None
--- a/litellm/tests/test_alangfuse.py
+++ b/litellm/tests/test_alangfuse.py
@ -1224,3 +1224,14 @@ def test_langfuse_prompt_type(prompt):
    _add_prompt_to_generation_params(
        generation_params=generation_params, clean_metadata=clean_metadata
    )
+
+
+def test_langfuse_logging_metadata():
+    from litellm.integrations.langfuse import log_requester_metadata
+
+    metadata = {"key": "value", "requester_metadata": {"key": "value"}}
+
+    got_metadata = log_requester_metadata(clean_metadata=metadata)
+    expected_metadata = {"requester_metadata": {"key": "value"}}
+
+    assert expected_metadata == got_metadata
--- a/litellm/tests/test_anthropic_prompt_caching.py
+++ b/litellm/tests/test_anthropic_prompt_caching.py
@ -61,6 +61,7 @@ async def test_litellm_anthropic_prompt_caching_tools():
        }

    mock_response.json = return_val
+    mock_response.headers = {"key": "value"}

    litellm.set_verbose = True
    with patch(
@ -466,6 +467,7 @@ async def test_litellm_anthropic_prompt_caching_system():
        }

    mock_response.json = return_val
+    mock_response.headers = {"key": "value"}

    litellm.set_verbose = True
    with patch(
--- a/litellm/tests/test_custom_callback_input.py
+++ b/litellm/tests/test_custom_callback_input.py
@ -1173,7 +1173,12 @@ def test_turn_off_message_logging():
 ##### VALID JSON ######


-@pytest.mark.parametrize("model", ["gpt-3.5-turbo", "azure/chatgpt-v-2"])
+@pytest.mark.parametrize(
+    "model",
+    [
+        "ft:gpt-3.5-turbo:my-org:custom_suffix:id"
+    ],  # "gpt-3.5-turbo", "azure/chatgpt-v-2",
+)
@pytest.mark.parametrize(
    "turn_off_message_logging",
    [
@ -1200,7 +1205,7 @@ def test_standard_logging_payload(model, turn_off_message_logging):
        _ = litellm.completion(
            model=model,
            messages=[{"role": "user", "content": "Hey, how's it going?"}],
-            # mock_response="Going well!",
+            mock_response="Going well!",
        )

        time.sleep(2)
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -7,6 +7,8 @@ from typing import Any

 from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError

+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
@ -884,6 +886,42 @@ def _pre_call_utils(
    return data, original_function, mapped_target


+def _pre_call_utils_httpx(
+    call_type: str,
+    data: dict,
+    client: Union[HTTPHandler, AsyncHTTPHandler],
+    sync_mode: bool,
+    streaming: Optional[bool],
+):
+    mapped_target: Any = client.client
+    if call_type == "embedding":
+        data["input"] = "Hello world!"
+
+        if sync_mode:
+            original_function = litellm.embedding
+        else:
+            original_function = litellm.aembedding
+    elif call_type == "chat_completion":
+        data["messages"] = [{"role": "user", "content": "Hello world"}]
+        if streaming is True:
+            data["stream"] = True
+
+        if sync_mode:
+            original_function = litellm.completion
+        else:
+            original_function = litellm.acompletion
+    elif call_type == "completion":
+        data["prompt"] = "Hello world"
+        if streaming is True:
+            data["stream"] = True
+        if sync_mode:
+            original_function = litellm.text_completion
+        else:
+            original_function = litellm.atext_completion
+
+    return data, original_function, mapped_target
+
+
@pytest.mark.parametrize(
    "sync_mode",
    [True, False],
@ -1006,3 +1044,111 @@ async def test_exception_with_headers(sync_mode, provider, model, call_type, str
        if exception_raised is False:
            print(resp)
        assert exception_raised
+
+
+@pytest.mark.parametrize(
+    "sync_mode",
+    [True, False],
+)
+@pytest.mark.parametrize("streaming", [True, False])
+@pytest.mark.parametrize(
+    "provider, model, call_type",
+    [
+        ("anthropic", "claude-3-haiku-20240307", "chat_completion"),
+    ],
+)
+@pytest.mark.asyncio
+async def test_exception_with_headers_httpx(
+    sync_mode, provider, model, call_type, streaming
+):
+    """
+    User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds"
+    but Azure says to retry in at most 9s
+
+    ```
+    {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
+    ```
+    """
+    print(f"Received args: {locals()}")
+    import openai
+
+    if sync_mode:
+        client = HTTPHandler()
+    else:
+        client = AsyncHTTPHandler()
+
+    data = {"model": model}
+    data, original_function, mapped_target = _pre_call_utils_httpx(
+        call_type=call_type,
+        data=data,
+        client=client,
+        sync_mode=sync_mode,
+        streaming=streaming,
+    )
+
+    cooldown_time = 30.0
+
+    def _return_exception(*args, **kwargs):
+        import datetime
+
+        from httpx import Headers, HTTPStatusError, Request, Response
+
+        # Create the Request object
+        request = Request("POST", "http://0.0.0.0:9000/chat/completions")
+
+        # Create the Response object with the necessary headers and status code
+        response = Response(
+            status_code=429,
+            headers=Headers(
+                {
+                    "date": "Sat, 21 Sep 2024 22:56:53 GMT",
+                    "server": "uvicorn",
+                    "retry-after": "30",
+                    "content-length": "30",
+                    "content-type": "application/json",
+                }
+            ),
+            request=request,
+        )
+
+        # Create and raise the HTTPStatusError exception
+        raise HTTPStatusError(
+            message="Error code: 429 - Rate Limit Error!",
+            request=request,
+            response=response,
+        )
+
+    with patch.object(
+        mapped_target,
+        "send",
+        side_effect=_return_exception,
+    ):
+        new_retry_after_mock_client = MagicMock(return_value=-1)
+
+        litellm.utils._get_retry_after_from_exception_header = (
+            new_retry_after_mock_client
+        )
+
+        exception_raised = False
+        try:
+            if sync_mode:
+                resp = original_function(**data, client=client)
+                if streaming:
+                    for chunk in resp:
+                        continue
+            else:
+                resp = await original_function(**data, client=client)
+
+                if streaming:
+                    async for chunk in resp:
+                        continue
+
+        except litellm.RateLimitError as e:
+            exception_raised = True
+            assert e.litellm_response_headers is not None
+            print("e.litellm_response_headers", e.litellm_response_headers)
+            assert int(e.litellm_response_headers["retry-after"]) == cooldown_time
+
+        if exception_raised is False:
+            print(resp)
+        assert exception_raised
--- a/litellm/tests/test_function_calling.py
+++ b/litellm/tests/test_function_calling.py
@ -45,11 +45,12 @@ def get_current_weather(location, unit="fahrenheit"):
@pytest.mark.parametrize(
    "model",
    [
-        # "gpt-3.5-turbo-1106",
+        "gpt-3.5-turbo-1106",
        # "mistral/mistral-large-latest",
        # "claude-3-haiku-20240307",
        # "gemini/gemini-1.5-pro",
        "anthropic.claude-3-sonnet-20240229-v1:0",
+        "groq/llama3-8b-8192",
    ],
 )
@pytest.mark.flaky(retries=3, delay=1)
@ -154,6 +155,105 @@ def test_aaparallel_function_call(model):

 # test_parallel_function_call()

+from litellm.types.utils import ChatCompletionMessageToolCall, Function, Message
+
+
+@pytest.mark.parametrize(
+    "model, provider",
+    [
+        (
+            "anthropic.claude-3-sonnet-20240229-v1:0",
+            "bedrock",
+        ),
+        ("claude-3-haiku-20240307", "anthropic"),
+    ],
+)
+@pytest.mark.parametrize(
+    "messages, expected_error_msg",
+    [
+        (
+            [
+                {
+                    "role": "user",
+                    "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
+                },
+                Message(
+                    content="Here are the current weather conditions for San Francisco, Tokyo, and Paris:",
+                    role="assistant",
+                    tool_calls=[
+                        ChatCompletionMessageToolCall(
+                            index=1,
+                            function=Function(
+                                arguments='{"location": "San Francisco, CA", "unit": "fahrenheit"}',
+                                name="get_current_weather",
+                            ),
+                            id="tooluse_Jj98qn6xQlOP_PiQr-w9iA",
+                            type="function",
+                        )
+                    ],
+                    function_call=None,
+                ),
+                {
+                    "tool_call_id": "tooluse_Jj98qn6xQlOP_PiQr-w9iA",
+                    "role": "tool",
+                    "name": "get_current_weather",
+                    "content": '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}',
+                },
+            ],
+            True,
+        ),
+        (
+            [
+                {
+                    "role": "user",
+                    "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
+                }
+            ],
+            False,
+        ),
+    ],
+)
+def test_parallel_function_call_anthropic_error_msg(
+    model, provider, messages, expected_error_msg
+):
+    """
+    Anthropic doesn't support tool calling without `tools=` param specified.
+
+    Ensure this error is thrown when `tools=` param is not specified. But tool call requests are made.
+
+    Reference Issue: https://github.com/BerriAI/litellm/issues/5747, https://github.com/BerriAI/litellm/issues/5388
+    """
+    try:
+        litellm.set_verbose = True
+
+        messages = messages
+
+        if expected_error_msg:
+            with pytest.raises(litellm.UnsupportedParamsError) as e:
+                second_response = litellm.completion(
+                    model=model,
+                    messages=messages,
+                    temperature=0.2,
+                    seed=22,
+                    drop_params=True,
+                )  # get a new response from the model where it can see the function response
+                print("second response\n", second_response)
+        else:
+            second_response = litellm.completion(
+                model=model,
+                messages=messages,
+                temperature=0.2,
+                seed=22,
+                drop_params=True,
+            )  # get a new response from the model where it can see the function response
+            print("second response\n", second_response)
+    except litellm.InternalServerError as e:
+        print(e)
+    except litellm.RateLimitError as e:
+        print(e)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+

 def test_parallel_function_call_stream():
    try:
--- a/litellm/tests/test_get_model_info.py
+++ b/litellm/tests/test_get_model_info.py
@ -62,3 +62,9 @@ def test_get_model_info_shows_supports_prompt_caching():
    info = litellm.get_model_info("deepseek/deepseek-chat")
    print("info", info)
    assert info.get("supports_prompt_caching") is True
+
+
+def test_get_model_info_finetuned_models():
+    info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id")
+    print("info", info)
+    assert info["input_cost_per_token"] == 0.000003
--- a/litellm/types/llms/anthropic.py
+++ b/litellm/types/llms/anthropic.py
@ -18,13 +18,13 @@ class AnthropicMessagesTool(TypedDict, total=False):


 class AnthropicMessagesTextParam(TypedDict, total=False):
-    type: Literal["text"]
-    text: str
+    type: Required[Literal["text"]]
+    text: Required[str]
    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]


 class AnthropicMessagesToolUseParam(TypedDict):
-    type: Literal["tool_use"]
+    type: Required[Literal["tool_use"]]
    id: str
    name: str
    input: dict
@ -58,8 +58,8 @@ class AnthropicImageParamSource(TypedDict):


 class AnthropicMessagesImageParam(TypedDict, total=False):
-    type: Literal["image"]
-    source: AnthropicImageParamSource
+    type: Required[Literal["image"]]
+    source: Required[AnthropicImageParamSource]
    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]


@ -102,16 +102,13 @@ class AnthropicSystemMessageContent(TypedDict, total=False):
    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]


-class AnthropicMessagesRequest(TypedDict, total=False):
-    model: Required[str]
-    messages: Required[
-        List[
-            Union[
-                AnthropicMessagesUserMessageParam,
-                AnthopicMessagesAssistantMessageParam,
-            ]
-        ]
+AllAnthropicMessageValues = Union[
+    AnthropicMessagesUserMessageParam, AnthopicMessagesAssistantMessageParam
 ]
+
+
+class AnthropicMessageRequestBase(TypedDict, total=False):
+    messages: Required[List[AllAnthropicMessageValues]]
    max_tokens: Required[int]
    metadata: AnthropicMetadata
    stop_sequences: List[str]
@ -123,6 +120,9 @@ class AnthropicMessagesRequest(TypedDict, total=False):
    top_k: int
    top_p: float

+
+class AnthropicMessagesRequest(AnthropicMessageRequestBase, total=False):
+    model: Required[str]
    # litellm param - used for tracking litellm proxy metadata in the request
    litellm_metadata: dict

@ -291,9 +291,9 @@ class AnthropicResponse(BaseModel):
    """Billing and rate-limit usage."""


-class AnthropicChatCompletionUsageBlock(TypedDict, total=False):
-    prompt_tokens: Required[int]
-    completion_tokens: Required[int]
-    total_tokens: Required[int]
+from .openai import ChatCompletionUsageBlock
+
+
+class AnthropicChatCompletionUsageBlock(ChatCompletionUsageBlock, total=False):
    cache_creation_input_tokens: int
    cache_read_input_tokens: int
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -343,13 +343,16 @@ class ChatCompletionImageObject(TypedDict):
    image_url: Union[str, ChatCompletionImageUrlObject]


-class OpenAIChatCompletionUserMessage(TypedDict):
-    role: Literal["user"]
-    content: Union[
+OpenAIMessageContent = Union[
    str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
 ]


+class OpenAIChatCompletionUserMessage(TypedDict):
+    role: Literal["user"]
+    content: OpenAIMessageContent
+
+
 class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
    cache_control: ChatCompletionCachedContent

--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -7,7 +7,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
 from openai._models import BaseModel as OpenAIObject
 from openai.types.audio.transcription_create_params import FileTypes  # type: ignore
 from openai.types.completion_usage import CompletionTokensDetails, CompletionUsage
-from pydantic import ConfigDict, Field, PrivateAttr
+from pydantic import ConfigDict, PrivateAttr
 from typing_extensions import Callable, Dict, Required, TypedDict, override

 from ..litellm_core_utils.core_helpers import map_finish_reason
--- a/litellm/utils.py
+++ b/litellm/utils.py
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -1173,6 +1173,18 @@
        "supports_function_calling": true,
        "supports_assistant_prefill": true
    },
+    "mistral/pixtral-12b-2409": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000015,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true,
+        "supports_vision": true
+    },
    "mistral/open-mistral-7b": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py
@ -25,7 +25,12 @@ from unittest.mock import MagicMock, patch
 import pytest

 import litellm
-from litellm import AnthropicConfig, Router, adapter_completion
+from litellm import (
+    AnthropicConfig,
+    Router,
+    adapter_completion,
+    AnthropicExperimentalPassThroughConfig,
+)
 from litellm.adapters.anthropic_adapter import anthropic_adapter
 from litellm.types.llms.anthropic import AnthropicResponse

@ -33,7 +38,7 @@ from litellm.types.llms.anthropic import AnthropicResponse
 def test_anthropic_completion_messages_translation():
    messages = [{"role": "user", "content": "Hey, how's it going?"}]

-    translated_messages = AnthropicConfig().translate_anthropic_messages_to_openai(messages=messages)  # type: ignore
+    translated_messages = AnthropicExperimentalPassThroughConfig().translate_anthropic_messages_to_openai(messages=messages)  # type: ignore

    assert translated_messages == [{"role": "user", "content": "Hey, how's it going?"}]

--- a/tests/llm_translation/test_databricks.py
+++ b/tests/llm_translation/test_databricks.py
@ -5,7 +5,11 @@ import pytest
 import sys
 from typing import Any, Dict, List
 from unittest.mock import MagicMock, Mock, patch
+import os

+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
 from litellm.exceptions import BadRequestError
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
				`@ -0,0 +1 @@`
				`from .handler import AnthropicChatCompletion, ModelResponseIterator`