LiteLLM Minor Fixes & Improvements (09/27/2024) (#5938)

* fix(langfuse.py): prevent double logging requester metadata Fixes https://github.com/BerriAI/litellm/issues/5935 * build(model_prices_and_context_window.json): add mistral pixtral cost tracking Closes https://github.com/BerriAI/litellm/issues/5837 * handle streaming for azure ai studio error * [Perf Proxy] parallel request limiter - use one cache update call (#5932) * fix parallel request limiter - use one cache update call * ci/cd run again * run ci/cd again * use docker username password * fix config.yml * fix config * fix config * fix config.yml * ci/cd run again * use correct typing for batch set cache * fix async_set_cache_pipeline * fix only check user id tpm / rpm limits when limits set * fix test_openai_azure_embedding_with_oidc_and_cf * fix(groq/chat/transformation.py): Fixes https://github.com/BerriAI/litellm/issues/5839 * feat(anthropic/chat.py): return 'retry-after' headers from anthropic Fixes https://github.com/BerriAI/litellm/issues/4387 * feat: raise validation error if message has tool calls without passing `tools` param for anthropic/bedrock Closes https://github.com/BerriAI/litellm/issues/5747 * [Feature]#5940, add max_workers parameter for the batch_completion (#5947) * handle streaming for azure ai studio error * bump: version 1.48.2 → 1.48.3 * docs(data_security.md): add legal/compliance faq's Make it easier for companies to use litellm * docs: resolve imports * [Feature]#5940, add max_workers parameter for the batch_completion method --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local> * fix(converse_transformation.py): fix default message value * fix(utils.py): fix get_model_info to handle finetuned models Fixes issue for standard logging payloads, where model_map_value was null for finetuned openai models * fix(litellm_pre_call_utils.py): add debug statement for data sent after updating with team/key callbacks * fix: fix linting errors * fix(anthropic/chat/handler.py): fix cache creation input tokens * fix(exception_mapping_utils.py): fix missing imports * fix(anthropic/chat/handler.py): fix usage block translation * test: fix test * test: fix tests * style(types/utils.py): trigger new build * test: fix test --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Jose Alberto Arango Sanchez <jose.arangos@udea.edu.co> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local>
2024-09-27 22:52:57 -07:00 · 2024-09-27 22:52:57 -07:00 · 0b30e212da
commit 0b30e212da
parent 754981a78f
35 changed files with 3657 additions and 2820 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -89,6 +89,7 @@ retry = True
 ### AUTH ###
 api_key: Optional[str] = None
 openai_key: Optional[str] = None
 groq_key: Optional[str] = None
 databricks_key: Optional[str] = None
 azure_key: Optional[str] = None
 anthropic_key: Optional[str] = None
@ -892,7 +893,11 @@ ALL_LITELLM_RESPONSE_TYPES = [
 from .types.utils import ImageObject
 from .llms.custom_llm import CustomLLM
 from .llms.huggingface_restapi import HuggingfaceConfig
-from .llms.anthropic.chat import AnthropicConfig
+from .llms.anthropic.chat.handler import AnthropicConfig
 from .llms.anthropic.experimental_pass_through.transformation import (
    AnthropicExperimentalPassThroughConfig,
 )
 from .llms.groq.stt.transformation import GroqSTTConfig
 from .llms.anthropic.completion import AnthropicTextConfig
 from .llms.databricks.chat import DatabricksConfig, DatabricksEmbeddingConfig
 from .llms.predibase import PredibaseConfig
@ -962,8 +967,8 @@ from .llms.OpenAI.openai import (
    OpenAITextCompletionConfig,
    MistralEmbeddingConfig,
    DeepInfraConfig,
    GroqConfig,
 )
 from .llms.groq.chat.transformation import GroqChatConfig
 from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
 from .llms.mistral.mistral_chat_transformation import MistralConfig
 from .llms.OpenAI.chat.o1_transformation import (
--- a/litellm/adapters/anthropic_adapter.py
+++ b/litellm/adapters/anthropic_adapter.py
@ -34,7 +34,7 @@ class AnthropicAdapter(CustomLogger):
        """
        request_body = AnthropicMessagesRequest(**kwargs)  # type: ignore
-        translated_body = litellm.AnthropicConfig().translate_anthropic_to_openai(
+        translated_body = litellm.AnthropicExperimentalPassThroughConfig().translate_anthropic_to_openai(
            anthropic_message_request=request_body
        )
@ -44,7 +44,7 @@ class AnthropicAdapter(CustomLogger):
        self, response: litellm.ModelResponse
    ) -> Optional[AnthropicResponse]:
-        return litellm.AnthropicConfig().translate_openai_response_to_anthropic(
+        return litellm.AnthropicExperimentalPassThroughConfig().translate_openai_response_to_anthropic(
            response=response
        )
@ -99,7 +99,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
                if chunk == "None" or chunk is None:
                    raise Exception
-                processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic(
+                processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic(
                    response=chunk
                )
                if (
@ -163,7 +163,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
            async for chunk in self.completion_stream:
                if chunk == "None" or chunk is None:
                    raise Exception
-                processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic(
+                processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic(
                    response=chunk
                )
                if (
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@ -601,7 +601,7 @@ class LangFuseLogger:
                "input": input if not mask_input else "redacted-by-litellm",
                "output": output if not mask_output else "redacted-by-litellm",
                "usage": usage,
-                "metadata": clean_metadata,
+                "metadata": log_requester_metadata(clean_metadata),
                "level": level,
                "version": clean_metadata.pop("version", None),
            }
@ -768,3 +768,15 @@ def log_provider_specific_information_as_span(
                name="vertex_ai_grounding_metadata",
                input=vertex_ai_grounding_metadata,
            )
 def log_requester_metadata(clean_metadata: dict):
    returned_metadata = {}
    requester_metadata = clean_metadata.get("requester_metadata") or {}
    for k, v in clean_metadata.items():
        if k not in requester_metadata:
            returned_metadata[k] = v
    returned_metadata.update({"requester_metadata": requester_metadata})
    return returned_metadata
--- a/litellm/litellm_core_utils/exception_mapping_utils.py
+++ b/litellm/litellm_core_utils/exception_mapping_utils.py
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -1015,9 +1015,8 @@ class Logging:
                                != langFuseLogger.public_key
                            )
                            or (
-                                self.langfuse_public_key is not None
+                                self.langfuse_secret is not None
-                                and self.langfuse_public_key
+                                and self.langfuse_secret != langFuseLogger.secret_key
                                != langFuseLogger.public_key
                            )
                            or (
                                self.langfuse_host is not None
@ -1045,7 +1044,6 @@ class Logging:
                                    service_name="langfuse",
                                    logging_obj=temp_langfuse_logger,
                                )
                        if temp_langfuse_logger is not None:
                            _response = temp_langfuse_logger.log_event(
                                kwargs=kwargs,
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@ -220,104 +220,6 @@ class DeepInfraConfig:
        return optional_params
 class GroqConfig:
    """
    Reference: https://deepinfra.com/docs/advanced/openai_api
    The class `DeepInfra` provides configuration for the DeepInfra's Chat Completions API interface. Below are the parameters:
    """
    frequency_penalty: Optional[int] = None
    function_call: Optional[Union[str, dict]] = None
    functions: Optional[list] = None
    logit_bias: Optional[dict] = None
    max_tokens: Optional[int] = None
    n: Optional[int] = None
    presence_penalty: Optional[int] = None
    stop: Optional[Union[str, list]] = None
    temperature: Optional[int] = None
    top_p: Optional[int] = None
    response_format: Optional[dict] = None
    tools: Optional[list] = None
    tool_choice: Optional[Union[str, dict]] = None
    def __init__(
        self,
        frequency_penalty: Optional[int] = None,
        function_call: Optional[Union[str, dict]] = None,
        functions: Optional[list] = None,
        logit_bias: Optional[dict] = None,
        max_tokens: Optional[int] = None,
        n: Optional[int] = None,
        presence_penalty: Optional[int] = None,
        stop: Optional[Union[str, list]] = None,
        temperature: Optional[int] = None,
        top_p: Optional[int] = None,
        response_format: Optional[dict] = None,
        tools: Optional[list] = None,
        tool_choice: Optional[Union[str, dict]] = None,
    ) -> None:
        locals_ = locals().copy()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params_stt(self):
        return [
            "prompt",
            "response_format",
            "temperature",
            "language",
        ]
    def get_supported_openai_response_formats_stt(self) -> List[str]:
        return ["json", "verbose_json", "text"]
    def map_openai_params_stt(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        response_formats = self.get_supported_openai_response_formats_stt()
        for param, value in non_default_params.items():
            if param == "response_format":
                if value in response_formats:
                    optional_params[param] = value
                else:
                    if litellm.drop_params is True or drop_params is True:
                        pass
                    else:
                        raise litellm.utils.UnsupportedParamsError(
                            message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
                                value
                            ),
                            status_code=400,
                        )
            else:
                optional_params[param] = value
        return optional_params
 class OpenAIConfig:
    """
    Reference: https://platform.openai.com/docs/api-reference/chat/create
--- a/litellm/llms/anthropic/chat/init.py
+++ b/litellm/llms/anthropic/chat/init.py
@ -0,0 +1 @@
 from .handler import AnthropicChatCompletion, ModelResponseIterator
--- a/litellm/llms/anthropic/chat/handler.py
+++ b/litellm/llms/anthropic/chat/handler.py
@ -71,12 +71,19 @@ from litellm.types.llms.openai import (
    ChatCompletionToolParamFunctionChunk,
    ChatCompletionUsageBlock,
    ChatCompletionUserMessage,
    OpenAIMessageContent,
 )
 from litellm.types.utils import Choices, GenericStreamingChunk
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
-from ..base import BaseLLM
+from ...base import BaseLLM
-from ..prompt_templates.factory import custom_prompt, prompt_factory
+from ...prompt_templates.factory import (
    anthropic_messages_pt,
    custom_prompt,
    prompt_factory,
 )
 from ..common_utils import AnthropicError
 from .transformation import AnthropicConfig
 class AnthropicConstants(Enum):
@ -86,558 +93,6 @@ class AnthropicConstants(Enum):
    # constants from https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/_constants.py
 class AnthropicError(Exception):
    def __init__(self, status_code: int, message):
        self.status_code = status_code
        self.message: str = message
        self.request = httpx.Request(
            method="POST", url="https://api.anthropic.com/v1/messages"
        )
        self.response = httpx.Response(status_code=status_code, request=self.request)
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs
 class AnthropicConfig:
    """
    Reference: https://docs.anthropic.com/claude/reference/messages_post
    to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
    """
    max_tokens: Optional[int] = (
        4096  # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default)
    )
    stop_sequences: Optional[list] = None
    temperature: Optional[int] = None
    top_p: Optional[int] = None
    top_k: Optional[int] = None
    metadata: Optional[dict] = None
    system: Optional[str] = None
    def __init__(
        self,
        max_tokens: Optional[
            int
        ] = 4096,  # You can pass in a value yourself or use the default value 4096
        stop_sequences: Optional[list] = None,
        temperature: Optional[int] = None,
        top_p: Optional[int] = None,
        top_k: Optional[int] = None,
        metadata: Optional[dict] = None,
        system: Optional[str] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self):
        return [
            "stream",
            "stop",
            "temperature",
            "top_p",
            "max_tokens",
            "max_completion_tokens",
            "tools",
            "tool_choice",
            "extra_headers",
        ]
    def get_cache_control_headers(self) -> dict:
        return {
            "anthropic-version": "2023-06-01",
            "anthropic-beta": "prompt-caching-2024-07-31",
        }
    def map_openai_params(self, non_default_params: dict, optional_params: dict):
        for param, value in non_default_params.items():
            if param == "max_tokens":
                optional_params["max_tokens"] = value
            if param == "max_completion_tokens":
                optional_params["max_tokens"] = value
            if param == "tools":
                optional_params["tools"] = value
            if param == "tool_choice":
                _tool_choice: Optional[AnthropicMessagesToolChoice] = None
                if value == "auto":
                    _tool_choice = {"type": "auto"}
                elif value == "required":
                    _tool_choice = {"type": "any"}
                elif isinstance(value, dict):
                    _tool_choice = {"type": "tool", "name": value["function"]["name"]}
                if _tool_choice is not None:
                    optional_params["tool_choice"] = _tool_choice
            if param == "stream" and value == True:
                optional_params["stream"] = value
            if param == "stop":
                if isinstance(value, str):
                    if (
                        value == "\n"
                    ) and litellm.drop_params == True:  # anthropic doesn't allow whitespace characters as stop-sequences
                        continue
                    value = [value]
                elif isinstance(value, list):
                    new_v = []
                    for v in value:
                        if (
                            v == "\n"
                        ) and litellm.drop_params == True:  # anthropic doesn't allow whitespace characters as stop-sequences
                            continue
                        new_v.append(v)
                    if len(new_v) > 0:
                        value = new_v
                    else:
                        continue
                optional_params["stop_sequences"] = value
            if param == "temperature":
                optional_params["temperature"] = value
            if param == "top_p":
                optional_params["top_p"] = value
        return optional_params
    def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
        """
        Return if {"cache_control": ..} in message content block
        Used to check if anthropic prompt caching headers need to be set.
        """
        for message in messages:
            if message["content"] is not None and isinstance(message["content"], list):
                for content in message["content"]:
                    if "cache_control" in content:
                        return True
        return False
    def translate_system_message(
        self, messages: List[AllMessageValues]
    ) -> List[AnthropicSystemMessageContent]:
        system_prompt_indices = []
        anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
        for idx, message in enumerate(messages):
            if message["role"] == "system":
                valid_content: bool = False
                system_message_block = ChatCompletionSystemMessage(**message)
                if isinstance(system_message_block["content"], str):
                    anthropic_system_message_content = AnthropicSystemMessageContent(
                        type="text",
                        text=system_message_block["content"],
                    )
                    if "cache_control" in system_message_block:
                        anthropic_system_message_content["cache_control"] = (
                            system_message_block["cache_control"]
                        )
                    anthropic_system_message_list.append(
                        anthropic_system_message_content
                    )
                    valid_content = True
                elif isinstance(message["content"], list):
                    for _content in message["content"]:
                        anthropic_system_message_content = (
                            AnthropicSystemMessageContent(
                                type=_content.get("type"),
                                text=_content.get("text"),
                            )
                        )
                        if "cache_control" in _content:
                            anthropic_system_message_content["cache_control"] = (
                                _content["cache_control"]
                            )
                        anthropic_system_message_list.append(
                            anthropic_system_message_content
                        )
                    valid_content = True
                if valid_content:
                    system_prompt_indices.append(idx)
        if len(system_prompt_indices) > 0:
            for idx in reversed(system_prompt_indices):
                messages.pop(idx)
        return anthropic_system_message_list
    ### FOR [BETA] `/v1/messages` endpoint support
    def translatable_anthropic_params(self) -> List:
        """
        Which anthropic params, we need to translate to the openai format.
        """
        return ["messages", "metadata", "system", "tool_choice", "tools"]
    def translate_anthropic_messages_to_openai(
        self,
        messages: List[
            Union[
                AnthropicMessagesUserMessageParam,
                AnthopicMessagesAssistantMessageParam,
            ]
        ],
    ) -> List:
        new_messages: List[AllMessageValues] = []
        for m in messages:
            user_message: Optional[ChatCompletionUserMessage] = None
            tool_message_list: List[ChatCompletionToolMessage] = []
            new_user_content_list: List[
                Union[ChatCompletionTextObject, ChatCompletionImageObject]
            ] = []
            ## USER MESSAGE ##
            if m["role"] == "user":
                ## translate user message
                if isinstance(m["content"], str):
                    user_message = ChatCompletionUserMessage(
                        role="user", content=m["content"]
                    )
                elif isinstance(m["content"], list):
                    for content in m["content"]:
                        if content["type"] == "text":
                            text_obj = ChatCompletionTextObject(
                                type="text", text=content["text"]
                            )
                            new_user_content_list.append(text_obj)
                        elif content["type"] == "image":
                            image_url = ChatCompletionImageUrlObject(
                                url=f"data:{content['type']};base64,{content['source']}"
                            )
                            image_obj = ChatCompletionImageObject(
                                type="image_url", image_url=image_url
                            )
                            new_user_content_list.append(image_obj)
                        elif content["type"] == "tool_result":
                            if "content" not in content:
                                tool_result = ChatCompletionToolMessage(
                                    role="tool",
                                    tool_call_id=content["tool_use_id"],
                                    content="",
                                )
                                tool_message_list.append(tool_result)
                            elif isinstance(content["content"], str):
                                tool_result = ChatCompletionToolMessage(
                                    role="tool",
                                    tool_call_id=content["tool_use_id"],
                                    content=content["content"],
                                )
                                tool_message_list.append(tool_result)
                            elif isinstance(content["content"], list):
                                for c in content["content"]:
                                    if c["type"] == "text":
                                        tool_result = ChatCompletionToolMessage(
                                            role="tool",
                                            tool_call_id=content["tool_use_id"],
                                            content=c["text"],
                                        )
                                        tool_message_list.append(tool_result)
                                    elif c["type"] == "image":
                                        image_str = (
                                            f"data:{c['type']};base64,{c['source']}"
                                        )
                                        tool_result = ChatCompletionToolMessage(
                                            role="tool",
                                            tool_call_id=content["tool_use_id"],
                                            content=image_str,
                                        )
                                        tool_message_list.append(tool_result)
            if user_message is not None:
                new_messages.append(user_message)
            if len(new_user_content_list) > 0:
                new_messages.append({"role": "user", "content": new_user_content_list})  # type: ignore
            if len(tool_message_list) > 0:
                new_messages.extend(tool_message_list)
            ## ASSISTANT MESSAGE ##
            assistant_message_str: Optional[str] = None
            tool_calls: List[ChatCompletionAssistantToolCall] = []
            if m["role"] == "assistant":
                if isinstance(m["content"], str):
                    assistant_message_str = m["content"]
                elif isinstance(m["content"], list):
                    for content in m["content"]:
                        if content["type"] == "text":
                            if assistant_message_str is None:
                                assistant_message_str = content["text"]
                            else:
                                assistant_message_str += content["text"]
                        elif content["type"] == "tool_use":
                            function_chunk = ChatCompletionToolCallFunctionChunk(
                                name=content["name"],
                                arguments=json.dumps(content["input"]),
                            )
                            tool_calls.append(
                                ChatCompletionAssistantToolCall(
                                    id=content["id"],
                                    type="function",
                                    function=function_chunk,
                                )
                            )
            if assistant_message_str is not None or len(tool_calls) > 0:
                assistant_message = ChatCompletionAssistantMessage(
                    role="assistant",
                    content=assistant_message_str,
                )
                if len(tool_calls) > 0:
                    assistant_message["tool_calls"] = tool_calls
                new_messages.append(assistant_message)
        return new_messages
    def translate_anthropic_tool_choice_to_openai(
        self, tool_choice: AnthropicMessagesToolChoice
    ) -> ChatCompletionToolChoiceValues:
        if tool_choice["type"] == "any":
            return "required"
        elif tool_choice["type"] == "auto":
            return "auto"
        elif tool_choice["type"] == "tool":
            tc_function_param = ChatCompletionToolChoiceFunctionParam(
                name=tool_choice.get("name", "")
            )
            return ChatCompletionToolChoiceObjectParam(
                type="function", function=tc_function_param
            )
        else:
            raise ValueError(
                "Incompatible tool choice param submitted - {}".format(tool_choice)
            )
    def translate_anthropic_tools_to_openai(
        self, tools: List[AnthropicMessagesTool]
    ) -> List[ChatCompletionToolParam]:
        new_tools: List[ChatCompletionToolParam] = []
        for tool in tools:
            function_chunk = ChatCompletionToolParamFunctionChunk(
                name=tool["name"],
                parameters=tool["input_schema"],
            )
            if "description" in tool:
                function_chunk["description"] = tool["description"]
            new_tools.append(
                ChatCompletionToolParam(type="function", function=function_chunk)
            )
        return new_tools
    def translate_anthropic_to_openai(
        self, anthropic_message_request: AnthropicMessagesRequest
    ) -> ChatCompletionRequest:
        """
        This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
        """
        new_messages: List[AllMessageValues] = []
        ## CONVERT ANTHROPIC MESSAGES TO OPENAI
        new_messages = self.translate_anthropic_messages_to_openai(
            messages=anthropic_message_request["messages"]
        )
        ## ADD SYSTEM MESSAGE TO MESSAGES
        if "system" in anthropic_message_request:
            new_messages.insert(
                0,
                ChatCompletionSystemMessage(
                    role="system", content=anthropic_message_request["system"]
                ),
            )
        new_kwargs: ChatCompletionRequest = {
            "model": anthropic_message_request["model"],
            "messages": new_messages,
        }
        ## CONVERT METADATA (user_id)
        if "metadata" in anthropic_message_request:
            if "user_id" in anthropic_message_request["metadata"]:
                new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
        # Pass litellm proxy specific metadata
        if "litellm_metadata" in anthropic_message_request:
            # metadata will be passed to litellm.acompletion(), it's a litellm_param
            new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
        ## CONVERT TOOL CHOICE
        if "tool_choice" in anthropic_message_request:
            new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
                tool_choice=anthropic_message_request["tool_choice"]
            )
        ## CONVERT TOOLS
        if "tools" in anthropic_message_request:
            new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
                tools=anthropic_message_request["tools"]
            )
        translatable_params = self.translatable_anthropic_params()
        for k, v in anthropic_message_request.items():
            if k not in translatable_params:  # pass remaining params as is
                new_kwargs[k] = v  # type: ignore
        return new_kwargs
    def _translate_openai_content_to_anthropic(
        self, choices: List[Choices]
    ) -> List[
        Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
    ]:
        new_content: List[
            Union[
                AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
            ]
        ] = []
        for choice in choices:
            if (
                choice.message.tool_calls is not None
                and len(choice.message.tool_calls) > 0
            ):
                for tool_call in choice.message.tool_calls:
                    new_content.append(
                        AnthropicResponseContentBlockToolUse(
                            type="tool_use",
                            id=tool_call.id,
                            name=tool_call.function.name or "",
                            input=json.loads(tool_call.function.arguments),
                        )
                    )
            elif choice.message.content is not None:
                new_content.append(
                    AnthropicResponseContentBlockText(
                        type="text", text=choice.message.content
                    )
                )
        return new_content
    def _translate_openai_finish_reason_to_anthropic(
        self, openai_finish_reason: str
    ) -> AnthropicFinishReason:
        if openai_finish_reason == "stop":
            return "end_turn"
        elif openai_finish_reason == "length":
            return "max_tokens"
        elif openai_finish_reason == "tool_calls":
            return "tool_use"
        return "end_turn"
    def translate_openai_response_to_anthropic(
        self, response: litellm.ModelResponse
    ) -> AnthropicResponse:
        ## translate content block
        anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices)  # type: ignore
        ## extract finish reason
        anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
            openai_finish_reason=response.choices[0].finish_reason  # type: ignore
        )
        # extract usage
        usage: litellm.Usage = getattr(response, "usage")
        anthropic_usage = AnthropicResponseUsageBlock(
            input_tokens=usage.prompt_tokens or 0,
            output_tokens=usage.completion_tokens or 0,
        )
        translated_obj = AnthropicResponse(
            id=response.id,
            type="message",
            role="assistant",
            model=response.model or "unknown-model",
            stop_sequence=None,
            usage=anthropic_usage,
            content=anthropic_content,
            stop_reason=anthropic_finish_reason,
        )
        return translated_obj
    def _translate_streaming_openai_chunk_to_anthropic(
        self, choices: List[OpenAIStreamingChoice]
    ) -> Tuple[
        Literal["text_delta", "input_json_delta"],
        Union[ContentTextBlockDelta, ContentJsonBlockDelta],
    ]:
        text: str = ""
        partial_json: Optional[str] = None
        for choice in choices:
            if choice.delta.content is not None:
                text += choice.delta.content
            elif choice.delta.tool_calls is not None:
                partial_json = ""
                for tool in choice.delta.tool_calls:
                    if (
                        tool.function is not None
                        and tool.function.arguments is not None
                    ):
                        partial_json += tool.function.arguments
        if partial_json is not None:
            return "input_json_delta", ContentJsonBlockDelta(
                type="input_json_delta", partial_json=partial_json
            )
        else:
            return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
    def translate_streaming_openai_response_to_anthropic(
        self, response: litellm.ModelResponse
    ) -> Union[ContentBlockDelta, MessageBlockDelta]:
        ## base case - final chunk w/ finish reason
        if response.choices[0].finish_reason is not None:
            delta = MessageDelta(
                stop_reason=self._translate_openai_finish_reason_to_anthropic(
                    response.choices[0].finish_reason
                ),
            )
            if getattr(response, "usage", None) is not None:
                litellm_usage_chunk: Optional[litellm.Usage] = response.usage  # type: ignore
            elif (
                hasattr(response, "_hidden_params")
                and "usage" in response._hidden_params
            ):
                litellm_usage_chunk = response._hidden_params["usage"]
            else:
                litellm_usage_chunk = None
            if litellm_usage_chunk is not None:
                usage_delta = UsageDelta(
                    input_tokens=litellm_usage_chunk.prompt_tokens or 0,
                    output_tokens=litellm_usage_chunk.completion_tokens or 0,
                )
            else:
                usage_delta = UsageDelta(input_tokens=0, output_tokens=0)
            return MessageBlockDelta(
                type="message_delta", delta=delta, usage=usage_delta
            )
        (
            type_of_content,
            content_block_delta,
        ) = self._translate_streaming_openai_chunk_to_anthropic(
            choices=response.choices  # type: ignore
        )
        return ContentBlockDelta(
            type="content_block_delta",
            index=response.choices[0].index,
            delta=content_block_delta,
        )
 # makes headers for API call
 def validate_environment(
    api_key, user_headers, model, messages: List[AllMessageValues]
@ -684,8 +139,14 @@ async def make_call(
            api_base, headers=headers, data=data, stream=True, timeout=timeout
        )
    except httpx.HTTPStatusError as e:
        error_headers = getattr(e, "headers", None)
        error_response = getattr(e, "response", None)
        if error_headers is None and error_response:
            error_headers = getattr(error_response, "headers", None)
        raise AnthropicError(
-            status_code=e.response.status_code, message=await e.response.aread()
+            status_code=e.response.status_code,
            message=await e.response.aread(),
            headers=error_headers,
        )
    except Exception as e:
        for exception in litellm.LITELLM_EXCEPTION_TYPES:
@ -726,8 +187,14 @@ def make_sync_call(
            api_base, headers=headers, data=data, stream=True, timeout=timeout
        )
    except httpx.HTTPStatusError as e:
        error_headers = getattr(e, "headers", None)
        error_response = getattr(e, "response", None)
        if error_headers is None and error_response:
            error_headers = getattr(error_response, "headers", None)
        raise AnthropicError(
-            status_code=e.response.status_code, message=e.response.read()
+            status_code=e.response.status_code,
            message=e.response.read(),
            headers=error_headers,
        )
    except Exception as e:
        for exception in litellm.LITELLM_EXCEPTION_TYPES:
@ -736,7 +203,12 @@ def make_sync_call(
        raise AnthropicError(status_code=500, message=str(e))
    if response.status_code != 200:
-        raise AnthropicError(status_code=response.status_code, message=response.read())
+        response_headers = getattr(response, "headers", None)
        raise AnthropicError(
            status_code=response.status_code,
            message=response.read(),
            headers=response_headers,
        )
    completion_stream = ModelResponseIterator(
        streaming_response=response.iter_lines(), sync_stream=True
@ -763,7 +235,7 @@ class AnthropicChatCompletion(BaseLLM):
        response: Union[requests.Response, httpx.Response],
        model_response: ModelResponse,
        stream: bool,
-        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,  # type: ignore
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
@ -772,6 +244,14 @@ class AnthropicChatCompletion(BaseLLM):
        encoding,
        json_mode: bool,
    ) -> ModelResponse:
        _hidden_params = {}
        _response_headers = dict(response.headers)
        if _response_headers is not None:
            llm_response_headers = {
                "{}-{}".format("llm_provider", k): v
                for k, v in _response_headers.items()
            }
            _hidden_params["additional_headers"] = llm_response_headers
        ## LOGGING
        logging_obj.post_call(
            input=messages,
@ -783,14 +263,21 @@ class AnthropicChatCompletion(BaseLLM):
        ## RESPONSE OBJECT
        try:
            completion_response = response.json()
-        except:
+        except Exception as e:
            response_headers = getattr(response, "headers", None)
            raise AnthropicError(
-                message=response.text, status_code=response.status_code
+                message="Unable to get json response - {}, Original Response: {}".format(
                    str(e), response.text
                ),
                status_code=response.status_code,
                headers=response_headers,
            )
        if "error" in completion_response:
            response_headers = getattr(response, "headers", None)
            raise AnthropicError(
                message=str(completion_response["error"]),
                status_code=response.status_code,
                headers=response_headers,
            )
        else:
            text_content = ""
@ -856,6 +343,8 @@ class AnthropicChatCompletion(BaseLLM):
        if "cache_read_input_tokens" in _usage:
            usage["cache_read_input_tokens"] = _usage["cache_read_input_tokens"]
        setattr(model_response, "usage", usage)  # type: ignore
        model_response._hidden_params = _hidden_params
        return model_response
    async def acompletion_stream_function(
@ -919,9 +408,9 @@ class AnthropicChatCompletion(BaseLLM):
        litellm_params=None,
        logger_fn=None,
        headers={},
-        client=None,
+        client: Optional[AsyncHTTPHandler] = None,
    ) -> Union[ModelResponse, CustomStreamWrapper]:
-        async_handler = get_async_httpx_client(
+        async_handler = client or get_async_httpx_client(
            llm_provider=litellm.LlmProviders.ANTHROPIC
        )
@ -937,7 +426,17 @@ class AnthropicChatCompletion(BaseLLM):
                original_response=str(e),
                additional_args={"complete_input_dict": data},
            )
-            raise e
+            status_code = getattr(e, "status_code", 500)
            error_headers = getattr(e, "headers", None)
            error_text = getattr(e, "text", str(e))
            error_response = getattr(e, "response", None)
            if error_headers is None and error_response:
                error_headers = getattr(error_response, "headers", None)
            raise AnthropicError(
                message=error_text,
                status_code=status_code,
                headers=error_headers,
            )
        return self._process_response(
            model=model,
@ -977,73 +476,18 @@ class AnthropicChatCompletion(BaseLLM):
        _is_function_call = False
        messages = copy.deepcopy(messages)
        optional_params = copy.deepcopy(optional_params)
        if model in custom_prompt_dict:
            # check if the model has a registered custom prompt
            model_prompt_details = custom_prompt_dict[model]
            prompt = custom_prompt(
                role_dict=model_prompt_details["roles"],
                initial_prompt_value=model_prompt_details["initial_prompt_value"],
                final_prompt_value=model_prompt_details["final_prompt_value"],
                messages=messages,
            )
        else:
            # Separate system prompt from rest of message
            anthropic_system_message_list = AnthropicConfig().translate_system_message(
                messages=messages
            )
            # Handling anthropic API Prompt Caching
            if len(anthropic_system_message_list) > 0:
                optional_params["system"] = anthropic_system_message_list
            # Format rest of message according to anthropic guidelines
            try:
                messages = prompt_factory(
                    model=model, messages=messages, custom_llm_provider="anthropic"
                )
            except Exception as e:
                raise AnthropicError(
                    status_code=400,
                    message="{}\nReceived Messages={}".format(str(e), messages),
                )  # don't use verbose_logger.exception, if exception is raised
        ## Load Config
        config = litellm.AnthropicConfig.get_config()
        for k, v in config.items():
            if (
                k not in optional_params
            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                optional_params[k] = v
        ## Handle Tool Calling
        if "tools" in optional_params:
            _is_function_call = True
            if "anthropic-beta" not in headers:
                # default to v1 of "anthropic-beta"
                headers["anthropic-beta"] = "tools-2024-05-16"
            anthropic_tools = []
            for tool in optional_params["tools"]:
                if "input_schema" in tool:  # assume in anthropic format
                    anthropic_tools.append(tool)
                else:  # assume openai tool call
                    new_tool = tool["function"]
                    new_tool["input_schema"] = new_tool.pop("parameters")  # rename key
                    if "cache_control" in tool:
                        new_tool["cache_control"] = tool["cache_control"]
                    anthropic_tools.append(new_tool)
            optional_params["tools"] = anthropic_tools
        stream = optional_params.pop("stream", None)
        is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
        json_mode: bool = optional_params.pop("json_mode", False)
        is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
-        data = {
+        data = AnthropicConfig()._transform_request(
-            "messages": messages,
+            model=model,
-            **optional_params,
+            messages=messages,
-        }
+            optional_params=optional_params,
-
+            headers=headers,
-        if is_vertex_request is False:
+            _is_function_call=_is_function_call,
-            data["model"] = model
+            is_vertex_request=is_vertex_request,
        )
        ## LOGGING
        logging_obj.pre_call(
@ -1136,12 +580,25 @@ class AnthropicChatCompletion(BaseLLM):
                    client = HTTPHandler(timeout=timeout)  # type: ignore
                else:
                    client = client
                try:
                    response = client.post(
-                    api_base, headers=headers, data=json.dumps(data), timeout=timeout
+                        api_base,
                        headers=headers,
                        data=json.dumps(data),
                        timeout=timeout,
                    )
-                if response.status_code != 200:
+                except Exception as e:
                    status_code = getattr(e, "status_code", 500)
                    error_headers = getattr(e, "headers", None)
                    error_text = getattr(e, "text", str(e))
                    error_response = getattr(e, "response", None)
                    if error_headers is None and error_response:
                        error_headers = getattr(error_response, "headers", None)
                    raise AnthropicError(
-                        status_code=response.status_code, message=response.text
+                        message=error_text,
                        status_code=status_code,
                        headers=error_headers,
                    )
        return self._process_response(
@ -1151,7 +608,7 @@ class AnthropicChatCompletion(BaseLLM):
            stream=stream,
            logging_obj=logging_obj,
            api_key=api_key,
-            data=data,
+            data=data,  # type: ignore
            messages=messages,
            print_verbose=print_verbose,
            optional_params=optional_params,
@ -1192,7 +649,7 @@ class ModelResponseIterator:
        return False
    def _handle_usage(
-        self, anthropic_usage_chunk: dict
+        self, anthropic_usage_chunk: Union[dict, UsageDelta]
    ) -> AnthropicChatCompletionUsageBlock:
        special_fields = ["input_tokens", "output_tokens"]
@ -1203,15 +660,19 @@ class ModelResponseIterator:
            + anthropic_usage_chunk.get("output_tokens", 0),
        )
-        if "cache_creation_input_tokens" in anthropic_usage_chunk:
+        cache_creation_input_tokens = anthropic_usage_chunk.get(
            usage_block["cache_creation_input_tokens"] = anthropic_usage_chunk[
            "cache_creation_input_tokens"
-            ]
+        )
        if cache_creation_input_tokens is not None and isinstance(
            cache_creation_input_tokens, int
        ):
            usage_block["cache_creation_input_tokens"] = cache_creation_input_tokens
-        if "cache_read_input_tokens" in anthropic_usage_chunk:
+        cache_read_input_tokens = anthropic_usage_chunk.get("cache_read_input_tokens")
-            usage_block["cache_read_input_tokens"] = anthropic_usage_chunk[
+        if cache_read_input_tokens is not None and isinstance(
-                "cache_read_input_tokens"
+            cache_read_input_tokens, int
-            ]
+        ):
            usage_block["cache_read_input_tokens"] = cache_read_input_tokens
        return usage_block
@ -1313,6 +774,7 @@ class ModelResponseIterator:
                }
                """
                message_start_block = MessageStartBlock(**chunk)  # type: ignore
                if "usage" in message_start_block["message"]:
                    usage = self._handle_usage(
                        anthropic_usage_chunk=message_start_block["message"]["usage"]
                    )
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -0,0 +1,289 @@
 import types
 from typing import List, Literal, Optional, Tuple, Union
 import litellm
 from litellm.llms.prompt_templates.factory import anthropic_messages_pt
 from litellm.types.llms.anthropic import (
    AnthropicMessageRequestBase,
    AnthropicMessagesRequest,
    AnthropicMessagesToolChoice,
    AnthropicSystemMessageContent,
 )
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionSystemMessage
 from litellm.utils import has_tool_call_blocks
 from ..common_utils import AnthropicError
 class AnthropicConfig:
    """
    Reference: https://docs.anthropic.com/claude/reference/messages_post
    to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
    """
    max_tokens: Optional[int] = (
        4096  # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default)
    )
    stop_sequences: Optional[list] = None
    temperature: Optional[int] = None
    top_p: Optional[int] = None
    top_k: Optional[int] = None
    metadata: Optional[dict] = None
    system: Optional[str] = None
    def __init__(
        self,
        max_tokens: Optional[
            int
        ] = 4096,  # You can pass in a value yourself or use the default value 4096
        stop_sequences: Optional[list] = None,
        temperature: Optional[int] = None,
        top_p: Optional[int] = None,
        top_k: Optional[int] = None,
        metadata: Optional[dict] = None,
        system: Optional[str] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self):
        return [
            "stream",
            "stop",
            "temperature",
            "top_p",
            "max_tokens",
            "max_completion_tokens",
            "tools",
            "tool_choice",
            "extra_headers",
        ]
    def get_cache_control_headers(self) -> dict:
        return {
            "anthropic-version": "2023-06-01",
            "anthropic-beta": "prompt-caching-2024-07-31",
        }
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        messages: Optional[List[AllMessageValues]] = None,
    ):
        for param, value in non_default_params.items():
            if param == "max_tokens":
                optional_params["max_tokens"] = value
            if param == "max_completion_tokens":
                optional_params["max_tokens"] = value
            if param == "tools":
                optional_params["tools"] = value
            if param == "tool_choice":
                _tool_choice: Optional[AnthropicMessagesToolChoice] = None
                if value == "auto":
                    _tool_choice = {"type": "auto"}
                elif value == "required":
                    _tool_choice = {"type": "any"}
                elif isinstance(value, dict):
                    _tool_choice = {"type": "tool", "name": value["function"]["name"]}
                if _tool_choice is not None:
                    optional_params["tool_choice"] = _tool_choice
            if param == "stream" and value is True:
                optional_params["stream"] = value
            if param == "stop":
                if isinstance(value, str):
                    if (
                        value == "\n"
                    ) and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
                        continue
                    value = [value]
                elif isinstance(value, list):
                    new_v = []
                    for v in value:
                        if (
                            v == "\n"
                        ) and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
                            continue
                        new_v.append(v)
                    if len(new_v) > 0:
                        value = new_v
                    else:
                        continue
                optional_params["stop_sequences"] = value
            if param == "temperature":
                optional_params["temperature"] = value
            if param == "top_p":
                optional_params["top_p"] = value
        ## VALIDATE REQUEST
        """
        Anthropic doesn't support tool calling without `tools=` param specified.
        """
        if (
            "tools" not in non_default_params
            and messages is not None
            and has_tool_call_blocks(messages)
        ):
            raise litellm.UnsupportedParamsError(
                message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
                model="",
                llm_provider="anthropic",
            )
        return optional_params
    def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
        """
        Return if {"cache_control": ..} in message content block
        Used to check if anthropic prompt caching headers need to be set.
        """
        for message in messages:
            _message_content = message.get("content")
            if _message_content is not None and isinstance(_message_content, list):
                for content in _message_content:
                    if "cache_control" in content:
                        return True
        return False
    def translate_system_message(
        self, messages: List[AllMessageValues]
    ) -> List[AnthropicSystemMessageContent]:
        """
        Translate system message to anthropic format.
        Removes system message from the original list and returns a new list of anthropic system message content.
        """
        system_prompt_indices = []
        anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
        for idx, message in enumerate(messages):
            if message["role"] == "system":
                valid_content: bool = False
                system_message_block = ChatCompletionSystemMessage(**message)
                if isinstance(system_message_block["content"], str):
                    anthropic_system_message_content = AnthropicSystemMessageContent(
                        type="text",
                        text=system_message_block["content"],
                    )
                    if "cache_control" in system_message_block:
                        anthropic_system_message_content["cache_control"] = (
                            system_message_block["cache_control"]
                        )
                    anthropic_system_message_list.append(
                        anthropic_system_message_content
                    )
                    valid_content = True
                elif isinstance(message["content"], list):
                    for _content in message["content"]:
                        anthropic_system_message_content = (
                            AnthropicSystemMessageContent(
                                type=_content.get("type"),
                                text=_content.get("text"),
                            )
                        )
                        if "cache_control" in _content:
                            anthropic_system_message_content["cache_control"] = (
                                _content["cache_control"]
                            )
                        anthropic_system_message_list.append(
                            anthropic_system_message_content
                        )
                    valid_content = True
                if valid_content:
                    system_prompt_indices.append(idx)
        if len(system_prompt_indices) > 0:
            for idx in reversed(system_prompt_indices):
                messages.pop(idx)
        return anthropic_system_message_list
    def _transform_request(
        self,
        model: str,
        messages: List[AllMessageValues],
        optional_params: dict,
        headers: dict,
        _is_function_call: bool,
        is_vertex_request: bool,
    ) -> dict:
        """
        Translate messages to anthropic format.
        """
        # Separate system prompt from rest of message
        anthropic_system_message_list = self.translate_system_message(messages=messages)
        # Handling anthropic API Prompt Caching
        if len(anthropic_system_message_list) > 0:
            optional_params["system"] = anthropic_system_message_list
        # Format rest of message according to anthropic guidelines
        try:
            anthropic_messages = anthropic_messages_pt(
                model=model,
                messages=messages,
                llm_provider="anthropic",
            )
        except Exception as e:
            raise AnthropicError(
                status_code=400,
                message="{}\nReceived Messages={}".format(str(e), messages),
            )  # don't use verbose_logger.exception, if exception is raised
        ## Load Config
        config = litellm.AnthropicConfig.get_config()
        for k, v in config.items():
            if (
                k not in optional_params
            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                optional_params[k] = v
        ## Handle Tool Calling
        if "tools" in optional_params:
            _is_function_call = True
            if "anthropic-beta" not in headers:
                # default to v1 of "anthropic-beta"
                headers["anthropic-beta"] = "tools-2024-05-16"
            anthropic_tools = []
            for tool in optional_params["tools"]:
                if "input_schema" in tool:  # assume in anthropic format
                    anthropic_tools.append(tool)
                else:  # assume openai tool call
                    new_tool = tool["function"]
                    new_tool["input_schema"] = new_tool.pop("parameters")  # rename key
                    if "cache_control" in tool:
                        new_tool["cache_control"] = tool["cache_control"]
                    anthropic_tools.append(new_tool)
            optional_params["tools"] = anthropic_tools
        data = {
            "messages": anthropic_messages,
            **optional_params,
        }
        if not is_vertex_request:
            data["model"] = model
        return data
--- a/litellm/llms/anthropic/common_utils.py
+++ b/litellm/llms/anthropic/common_utils.py
@ -0,0 +1,26 @@
 """
 This file contains common utils for anthropic calls.
 """
 from typing import Optional
 import httpx
 class AnthropicError(Exception):
    def __init__(
        self,
        status_code: int,
        message,
        headers: Optional[httpx.Headers] = None,
    ):
        self.status_code = status_code
        self.message: str = message
        self.headers = headers
        self.request = httpx.Request(
            method="POST", url="https://api.anthropic.com/v1/messages"
        )
        self.response = httpx.Response(status_code=status_code, request=self.request)
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs
--- a/litellm/llms/anthropic/experimental_pass_through/transformation.py
+++ b/litellm/llms/anthropic/experimental_pass_through/transformation.py
@ -0,0 +1,425 @@
 import json
 import types
 from typing import Any, Dict, List, Literal, Optional, Tuple, Union
 from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice
 import litellm
 from litellm.types.llms.anthropic import (
    AnthopicMessagesAssistantMessageParam,
    AnthropicChatCompletionUsageBlock,
    AnthropicFinishReason,
    AnthropicMessagesRequest,
    AnthropicMessagesTool,
    AnthropicMessagesToolChoice,
    AnthropicMessagesUserMessageParam,
    AnthropicResponse,
    AnthropicResponseContentBlockText,
    AnthropicResponseContentBlockToolUse,
    AnthropicResponseUsageBlock,
    AnthropicSystemMessageContent,
    ContentBlockDelta,
    ContentBlockStart,
    ContentBlockStop,
    ContentJsonBlockDelta,
    ContentTextBlockDelta,
    MessageBlockDelta,
    MessageDelta,
    MessageStartBlock,
    UsageDelta,
 )
 from litellm.types.llms.openai import (
    AllMessageValues,
    ChatCompletionAssistantMessage,
    ChatCompletionAssistantToolCall,
    ChatCompletionImageObject,
    ChatCompletionImageUrlObject,
    ChatCompletionRequest,
    ChatCompletionResponseMessage,
    ChatCompletionSystemMessage,
    ChatCompletionTextObject,
    ChatCompletionToolCallChunk,
    ChatCompletionToolCallFunctionChunk,
    ChatCompletionToolChoiceFunctionParam,
    ChatCompletionToolChoiceObjectParam,
    ChatCompletionToolChoiceValues,
    ChatCompletionToolMessage,
    ChatCompletionToolParam,
    ChatCompletionToolParamFunctionChunk,
    ChatCompletionUsageBlock,
    ChatCompletionUserMessage,
    OpenAIMessageContent,
 )
 from litellm.types.utils import Choices, GenericStreamingChunk
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
 from ...base import BaseLLM
 from ...prompt_templates.factory import (
    anthropic_messages_pt,
    custom_prompt,
    prompt_factory,
 )
 class AnthropicExperimentalPassThroughConfig:
    def __init__(self):
        pass
    ### FOR [BETA] `/v1/messages` endpoint support
    def translatable_anthropic_params(self) -> List:
        """
        Which anthropic params, we need to translate to the openai format.
        """
        return ["messages", "metadata", "system", "tool_choice", "tools"]
    def translate_anthropic_messages_to_openai(
        self,
        messages: List[
            Union[
                AnthropicMessagesUserMessageParam,
                AnthopicMessagesAssistantMessageParam,
            ]
        ],
    ) -> List:
        new_messages: List[AllMessageValues] = []
        for m in messages:
            user_message: Optional[ChatCompletionUserMessage] = None
            tool_message_list: List[ChatCompletionToolMessage] = []
            new_user_content_list: List[
                Union[ChatCompletionTextObject, ChatCompletionImageObject]
            ] = []
            ## USER MESSAGE ##
            if m["role"] == "user":
                ## translate user message
                message_content = m.get("content")
                if message_content and isinstance(message_content, str):
                    user_message = ChatCompletionUserMessage(
                        role="user", content=message_content
                    )
                elif message_content and isinstance(message_content, list):
                    for content in message_content:
                        if content["type"] == "text":
                            text_obj = ChatCompletionTextObject(
                                type="text", text=content["text"]
                            )
                            new_user_content_list.append(text_obj)
                        elif content["type"] == "image":
                            image_url = ChatCompletionImageUrlObject(
                                url=f"data:{content['type']};base64,{content['source']}"
                            )
                            image_obj = ChatCompletionImageObject(
                                type="image_url", image_url=image_url
                            )
                            new_user_content_list.append(image_obj)
                        elif content["type"] == "tool_result":
                            if "content" not in content:
                                tool_result = ChatCompletionToolMessage(
                                    role="tool",
                                    tool_call_id=content["tool_use_id"],
                                    content="",
                                )
                                tool_message_list.append(tool_result)
                            elif isinstance(content["content"], str):
                                tool_result = ChatCompletionToolMessage(
                                    role="tool",
                                    tool_call_id=content["tool_use_id"],
                                    content=content["content"],
                                )
                                tool_message_list.append(tool_result)
                            elif isinstance(content["content"], list):
                                for c in content["content"]:
                                    if c["type"] == "text":
                                        tool_result = ChatCompletionToolMessage(
                                            role="tool",
                                            tool_call_id=content["tool_use_id"],
                                            content=c["text"],
                                        )
                                        tool_message_list.append(tool_result)
                                    elif c["type"] == "image":
                                        image_str = (
                                            f"data:{c['type']};base64,{c['source']}"
                                        )
                                        tool_result = ChatCompletionToolMessage(
                                            role="tool",
                                            tool_call_id=content["tool_use_id"],
                                            content=image_str,
                                        )
                                        tool_message_list.append(tool_result)
            if user_message is not None:
                new_messages.append(user_message)
            if len(new_user_content_list) > 0:
                new_messages.append({"role": "user", "content": new_user_content_list})  # type: ignore
            if len(tool_message_list) > 0:
                new_messages.extend(tool_message_list)
            ## ASSISTANT MESSAGE ##
            assistant_message_str: Optional[str] = None
            tool_calls: List[ChatCompletionAssistantToolCall] = []
            if m["role"] == "assistant":
                if isinstance(m["content"], str):
                    assistant_message_str = m["content"]
                elif isinstance(m["content"], list):
                    for content in m["content"]:
                        if content["type"] == "text":
                            if assistant_message_str is None:
                                assistant_message_str = content["text"]
                            else:
                                assistant_message_str += content["text"]
                        elif content["type"] == "tool_use":
                            function_chunk = ChatCompletionToolCallFunctionChunk(
                                name=content["name"],
                                arguments=json.dumps(content["input"]),
                            )
                            tool_calls.append(
                                ChatCompletionAssistantToolCall(
                                    id=content["id"],
                                    type="function",
                                    function=function_chunk,
                                )
                            )
            if assistant_message_str is not None or len(tool_calls) > 0:
                assistant_message = ChatCompletionAssistantMessage(
                    role="assistant",
                    content=assistant_message_str,
                )
                if len(tool_calls) > 0:
                    assistant_message["tool_calls"] = tool_calls
                new_messages.append(assistant_message)
        return new_messages
    def translate_anthropic_tool_choice_to_openai(
        self, tool_choice: AnthropicMessagesToolChoice
    ) -> ChatCompletionToolChoiceValues:
        if tool_choice["type"] == "any":
            return "required"
        elif tool_choice["type"] == "auto":
            return "auto"
        elif tool_choice["type"] == "tool":
            tc_function_param = ChatCompletionToolChoiceFunctionParam(
                name=tool_choice.get("name", "")
            )
            return ChatCompletionToolChoiceObjectParam(
                type="function", function=tc_function_param
            )
        else:
            raise ValueError(
                "Incompatible tool choice param submitted - {}".format(tool_choice)
            )
    def translate_anthropic_tools_to_openai(
        self, tools: List[AnthropicMessagesTool]
    ) -> List[ChatCompletionToolParam]:
        new_tools: List[ChatCompletionToolParam] = []
        for tool in tools:
            function_chunk = ChatCompletionToolParamFunctionChunk(
                name=tool["name"],
                parameters=tool["input_schema"],
            )
            if "description" in tool:
                function_chunk["description"] = tool["description"]
            new_tools.append(
                ChatCompletionToolParam(type="function", function=function_chunk)
            )
        return new_tools
    def translate_anthropic_to_openai(
        self, anthropic_message_request: AnthropicMessagesRequest
    ) -> ChatCompletionRequest:
        """
        This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
        """
        new_messages: List[AllMessageValues] = []
        ## CONVERT ANTHROPIC MESSAGES TO OPENAI
        new_messages = self.translate_anthropic_messages_to_openai(
            messages=anthropic_message_request["messages"]
        )
        ## ADD SYSTEM MESSAGE TO MESSAGES
        if "system" in anthropic_message_request:
            new_messages.insert(
                0,
                ChatCompletionSystemMessage(
                    role="system", content=anthropic_message_request["system"]
                ),
            )
        new_kwargs: ChatCompletionRequest = {
            "model": anthropic_message_request["model"],
            "messages": new_messages,
        }
        ## CONVERT METADATA (user_id)
        if "metadata" in anthropic_message_request:
            if "user_id" in anthropic_message_request["metadata"]:
                new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
        # Pass litellm proxy specific metadata
        if "litellm_metadata" in anthropic_message_request:
            # metadata will be passed to litellm.acompletion(), it's a litellm_param
            new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
        ## CONVERT TOOL CHOICE
        if "tool_choice" in anthropic_message_request:
            new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
                tool_choice=anthropic_message_request["tool_choice"]
            )
        ## CONVERT TOOLS
        if "tools" in anthropic_message_request:
            new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
                tools=anthropic_message_request["tools"]
            )
        translatable_params = self.translatable_anthropic_params()
        for k, v in anthropic_message_request.items():
            if k not in translatable_params:  # pass remaining params as is
                new_kwargs[k] = v  # type: ignore
        return new_kwargs
    def _translate_openai_content_to_anthropic(
        self, choices: List[Choices]
    ) -> List[
        Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
    ]:
        new_content: List[
            Union[
                AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
            ]
        ] = []
        for choice in choices:
            if (
                choice.message.tool_calls is not None
                and len(choice.message.tool_calls) > 0
            ):
                for tool_call in choice.message.tool_calls:
                    new_content.append(
                        AnthropicResponseContentBlockToolUse(
                            type="tool_use",
                            id=tool_call.id,
                            name=tool_call.function.name or "",
                            input=json.loads(tool_call.function.arguments),
                        )
                    )
            elif choice.message.content is not None:
                new_content.append(
                    AnthropicResponseContentBlockText(
                        type="text", text=choice.message.content
                    )
                )
        return new_content
    def _translate_openai_finish_reason_to_anthropic(
        self, openai_finish_reason: str
    ) -> AnthropicFinishReason:
        if openai_finish_reason == "stop":
            return "end_turn"
        elif openai_finish_reason == "length":
            return "max_tokens"
        elif openai_finish_reason == "tool_calls":
            return "tool_use"
        return "end_turn"
    def translate_openai_response_to_anthropic(
        self, response: litellm.ModelResponse
    ) -> AnthropicResponse:
        ## translate content block
        anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices)  # type: ignore
        ## extract finish reason
        anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
            openai_finish_reason=response.choices[0].finish_reason  # type: ignore
        )
        # extract usage
        usage: litellm.Usage = getattr(response, "usage")
        anthropic_usage = AnthropicResponseUsageBlock(
            input_tokens=usage.prompt_tokens or 0,
            output_tokens=usage.completion_tokens or 0,
        )
        translated_obj = AnthropicResponse(
            id=response.id,
            type="message",
            role="assistant",
            model=response.model or "unknown-model",
            stop_sequence=None,
            usage=anthropic_usage,
            content=anthropic_content,
            stop_reason=anthropic_finish_reason,
        )
        return translated_obj
    def _translate_streaming_openai_chunk_to_anthropic(
        self, choices: List[OpenAIStreamingChoice]
    ) -> Tuple[
        Literal["text_delta", "input_json_delta"],
        Union[ContentTextBlockDelta, ContentJsonBlockDelta],
    ]:
        text: str = ""
        partial_json: Optional[str] = None
        for choice in choices:
            if choice.delta.content is not None:
                text += choice.delta.content
            elif choice.delta.tool_calls is not None:
                partial_json = ""
                for tool in choice.delta.tool_calls:
                    if (
                        tool.function is not None
                        and tool.function.arguments is not None
                    ):
                        partial_json += tool.function.arguments
        if partial_json is not None:
            return "input_json_delta", ContentJsonBlockDelta(
                type="input_json_delta", partial_json=partial_json
            )
        else:
            return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
    def translate_streaming_openai_response_to_anthropic(
        self, response: litellm.ModelResponse
    ) -> Union[ContentBlockDelta, MessageBlockDelta]:
        ## base case - final chunk w/ finish reason
        if response.choices[0].finish_reason is not None:
            delta = MessageDelta(
                stop_reason=self._translate_openai_finish_reason_to_anthropic(
                    response.choices[0].finish_reason
                ),
            )
            if getattr(response, "usage", None) is not None:
                litellm_usage_chunk: Optional[litellm.Usage] = response.usage  # type: ignore
            elif (
                hasattr(response, "_hidden_params")
                and "usage" in response._hidden_params
            ):
                litellm_usage_chunk = response._hidden_params["usage"]
            else:
                litellm_usage_chunk = None
            if litellm_usage_chunk is not None:
                usage_delta = UsageDelta(
                    input_tokens=litellm_usage_chunk.prompt_tokens or 0,
                    output_tokens=litellm_usage_chunk.completion_tokens or 0,
                )
            else:
                usage_delta = UsageDelta(input_tokens=0, output_tokens=0)
            return MessageBlockDelta(
                type="message_delta", delta=delta, usage=usage_delta
            )
        (
            type_of_content,
            content_block_delta,
        ) = self._translate_streaming_openai_chunk_to_anthropic(
            choices=response.choices  # type: ignore
        )
        return ContentBlockDelta(
            type="content_block_delta",
            index=response.choices[0].index,
            delta=content_block_delta,
        )
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@ -22,7 +22,7 @@ from litellm.types.llms.openai import (
    ChatCompletionToolParamFunctionChunk,
 )
 from litellm.types.utils import ModelResponse, Usage
-from litellm.utils import CustomStreamWrapper
+from litellm.utils import CustomStreamWrapper, has_tool_call_blocks
 from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt
 from ..common_utils import BedrockError, get_bedrock_tool_name
@ -136,6 +136,7 @@ class AmazonConverseConfig:
        non_default_params: dict,
        optional_params: dict,
        drop_params: bool,
        messages: Optional[List[AllMessageValues]] = None,
    ) -> dict:
        for param, value in non_default_params.items():
            if param == "response_format":
@ -202,6 +203,21 @@ class AmazonConverseConfig:
                )
                if _tool_choice_value is not None:
                    optional_params["tool_choice"] = _tool_choice_value
        ## VALIDATE REQUEST
        """
        Bedrock doesn't support tool calling without `tools=` param specified.
        """
        if (
            "tools" not in non_default_params
            and messages is not None
            and has_tool_call_blocks(messages)
        ):
            raise litellm.UnsupportedParamsError(
                message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
                model="",
                llm_provider="anthropic",
            )
        return optional_params
    def _transform_request(
--- a/litellm/llms/groq/chat/handler.py
+++ b/litellm/llms/groq/chat/handler.py
@ -0,0 +1,60 @@
 """
 Handles the chat completion request for groq
 """
 from typing import Any, Callable, Optional, Union
 from httpx._config import Timeout
 from litellm.utils import ModelResponse
 from ...groq.chat.transformation import GroqChatConfig
 from ...OpenAI.openai import OpenAIChatCompletion
 class GroqChatCompletion(OpenAIChatCompletion):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def completion(
        self,
        model_response: ModelResponse,
        timeout: Union[float, Timeout],
        optional_params: dict,
        logging_obj: Any,
        model: Optional[str] = None,
        messages: Optional[list] = None,
        print_verbose: Optional[Callable[..., Any]] = None,
        api_key: Optional[str] = None,
        api_base: Optional[str] = None,
        acompletion: bool = False,
        litellm_params=None,
        logger_fn=None,
        headers: Optional[dict] = None,
        custom_prompt_dict: dict = {},
        client=None,
        organization: Optional[str] = None,
        custom_llm_provider: Optional[str] = None,
        drop_params: Optional[bool] = None,
    ):
        messages = GroqChatConfig()._transform_messages(messages)  # type: ignore
        return super().completion(
            model_response,
            timeout,
            optional_params,
            logging_obj,
            model,
            messages,
            print_verbose,
            api_key,
            api_base,
            acompletion,
            litellm_params,
            logger_fn,
            headers,
            custom_prompt_dict,
            client,
            organization,
            custom_llm_provider,
            drop_params,
        )
--- a/litellm/llms/groq/chat/transformation.py
+++ b/litellm/llms/groq/chat/transformation.py
@ -0,0 +1,88 @@
 """
 Translate from OpenAI's `/v1/chat/completions` to Groq's `/v1/chat/completions`
 """
 import types
 from typing import List, Optional, Union
 from pydantic import BaseModel
 import litellm
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
 from ...OpenAI.chat.gpt_transformation import OpenAIGPTConfig
 class GroqChatConfig(OpenAIGPTConfig):
    frequency_penalty: Optional[int] = None
    function_call: Optional[Union[str, dict]] = None
    functions: Optional[list] = None
    logit_bias: Optional[dict] = None
    max_tokens: Optional[int] = None
    n: Optional[int] = None
    presence_penalty: Optional[int] = None
    stop: Optional[Union[str, list]] = None
    temperature: Optional[int] = None
    top_p: Optional[int] = None
    response_format: Optional[dict] = None
    tools: Optional[list] = None
    tool_choice: Optional[Union[str, dict]] = None
    def __init__(
        self,
        frequency_penalty: Optional[int] = None,
        function_call: Optional[Union[str, dict]] = None,
        functions: Optional[list] = None,
        logit_bias: Optional[dict] = None,
        max_tokens: Optional[int] = None,
        n: Optional[int] = None,
        presence_penalty: Optional[int] = None,
        stop: Optional[Union[str, list]] = None,
        temperature: Optional[int] = None,
        top_p: Optional[int] = None,
        response_format: Optional[dict] = None,
        tools: Optional[list] = None,
        tool_choice: Optional[Union[str, dict]] = None,
    ) -> None:
        locals_ = locals().copy()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def _transform_messages(self, messages: List[AllMessageValues]) -> List:
        for idx, message in enumerate(messages):
            """
            1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839
            """
            if isinstance(message, BaseModel):
                _message = message.model_dump()
            else:
                _message = message
            assistant_message = _message.get("role") == "assistant"
            if assistant_message:
                new_message = ChatCompletionAssistantMessage(role="assistant")
                for k, v in _message.items():
                    if v is not None:
                        new_message[k] = v  # type: ignore
                messages[idx] = new_message
        return messages
--- a/litellm/llms/groq/stt/transformation.py
+++ b/litellm/llms/groq/stt/transformation.py
@ -0,0 +1,101 @@
 """
 Translate from OpenAI's `/v1/audio/transcriptions` to Groq's `/v1/audio/transcriptions`
 """
 import types
 from typing import List, Optional, Union
 import litellm
 class GroqSTTConfig:
    frequency_penalty: Optional[int] = None
    function_call: Optional[Union[str, dict]] = None
    functions: Optional[list] = None
    logit_bias: Optional[dict] = None
    max_tokens: Optional[int] = None
    n: Optional[int] = None
    presence_penalty: Optional[int] = None
    stop: Optional[Union[str, list]] = None
    temperature: Optional[int] = None
    top_p: Optional[int] = None
    response_format: Optional[dict] = None
    tools: Optional[list] = None
    tool_choice: Optional[Union[str, dict]] = None
    def __init__(
        self,
        frequency_penalty: Optional[int] = None,
        function_call: Optional[Union[str, dict]] = None,
        functions: Optional[list] = None,
        logit_bias: Optional[dict] = None,
        max_tokens: Optional[int] = None,
        n: Optional[int] = None,
        presence_penalty: Optional[int] = None,
        stop: Optional[Union[str, list]] = None,
        temperature: Optional[int] = None,
        top_p: Optional[int] = None,
        response_format: Optional[dict] = None,
        tools: Optional[list] = None,
        tool_choice: Optional[Union[str, dict]] = None,
    ) -> None:
        locals_ = locals().copy()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params_stt(self):
        return [
            "prompt",
            "response_format",
            "temperature",
            "language",
        ]
    def get_supported_openai_response_formats_stt(self) -> List[str]:
        return ["json", "verbose_json", "text"]
    def map_openai_params_stt(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        response_formats = self.get_supported_openai_response_formats_stt()
        for param, value in non_default_params.items():
            if param == "response_format":
                if value in response_formats:
                    optional_params[param] = value
                else:
                    if litellm.drop_params is True or drop_params is True:
                        pass
                    else:
                        raise litellm.utils.UnsupportedParamsError(
                            message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
                                value
                            ),
                            status_code=400,
                        )
            else:
                optional_params[param] = value
        return optional_params
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_anthropic.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_anthropic.py
@ -276,7 +276,7 @@ def completion(
    from anthropic import AnthropicVertex
-    from litellm.llms.anthropic.chat import AnthropicChatCompletion
+    from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion
    from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
        VertexLLM,
    )
@ -367,7 +367,7 @@ async def async_completion(
    if client is None:
        vertex_ai_client = AsyncAnthropicVertex(
-            project_id=vertex_project, region=vertex_location, access_token=access_token
+            project_id=vertex_project, region=vertex_location, access_token=access_token  # type: ignore
        )
    else:
        vertex_ai_client = client
@ -438,7 +438,7 @@ async def async_streaming(
    if client is None:
        vertex_ai_client = AsyncAnthropicVertex(
-            project_id=vertex_project, region=vertex_location, access_token=access_token
+            project_id=vertex_project, region=vertex_location, access_token=access_token  # type: ignore
        )
    else:
        vertex_ai_client = client
--- a/litellm/main.py
+++ b/litellm/main.py
@ -96,6 +96,7 @@ from .llms.cohere import completion as cohere_completion  # type: ignore
 from .llms.cohere import embed as cohere_embed
 from .llms.custom_llm import CustomLLM, custom_chat_llm_router
 from .llms.databricks.chat import DatabricksChatCompletion
 from .llms.groq.chat.handler import GroqChatCompletion
 from .llms.huggingface_restapi import Huggingface
 from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
 from .llms.OpenAI.chat.o1_handler import OpenAIO1ChatCompletion
@ -168,6 +169,7 @@ openai_text_completions = OpenAITextCompletion()
 openai_o1_chat_completions = OpenAIO1ChatCompletion()
 openai_audio_transcriptions = OpenAIAudioTranscription()
 databricks_chat_completions = DatabricksChatCompletion()
 groq_chat_completions = GroqChatCompletion()
 azure_ai_chat_completions = AzureAIChatCompletion()
 azure_ai_embedding = AzureAIEmbedding()
 anthropic_chat_completions = AnthropicChatCompletion()
@ -958,6 +960,7 @@ def completion(
            extra_headers=extra_headers,
            api_version=api_version,
            parallel_tool_calls=parallel_tool_calls,
            messages=messages,
            **non_default_params,
        )
@ -1318,13 +1321,56 @@ def completion(
                    additional_args={"headers": headers},
                )
            response = _response
        elif custom_llm_provider == "groq":
            api_base = (
                api_base  # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there
                or litellm.api_base
                or get_secret("GROQ_API_BASE")
                or "https://api.groq.com/openai/v1"
            )
            # set API KEY
            api_key = (
                api_key
                or litellm.api_key  # for deepinfra/perplexity/anyscale/friendliai we check in get_llm_provider and pass in the api key from there
                or litellm.groq_key
                or get_secret("GROQ_API_KEY")
            )
            headers = headers or litellm.headers
            ## LOAD CONFIG - if set
            config = litellm.GroqChatConfig.get_config()
            for k, v in config.items():
                if (
                    k not in optional_params
                ):  # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in
                    optional_params[k] = v
            response = groq_chat_completions.completion(
                model=model,
                messages=messages,
                headers=headers,
                model_response=model_response,
                print_verbose=print_verbose,
                api_key=api_key,
                api_base=api_base,
                acompletion=acompletion,
                logging_obj=logging,
                optional_params=optional_params,
                litellm_params=litellm_params,
                logger_fn=logger_fn,
                timeout=timeout,  # type: ignore
                custom_prompt_dict=custom_prompt_dict,
                client=client,  # pass AsyncOpenAI, OpenAI client
                organization=organization,
                custom_llm_provider=custom_llm_provider,
            )
        elif (
            model in litellm.open_ai_chat_completion_models
            or custom_llm_provider == "custom_openai"
            or custom_llm_provider == "deepinfra"
            or custom_llm_provider == "perplexity"
            or custom_llm_provider == "groq"
            or custom_llm_provider == "nvidia_nim"
            or custom_llm_provider == "cerebras"
            or custom_llm_provider == "sambanova"
@ -1431,6 +1477,7 @@ def completion(
                    original_response=response,
                    additional_args={"headers": headers},
                )
        elif (
            "replicate" in model
            or custom_llm_provider == "replicate"
@ -2933,6 +2980,7 @@ def batch_completion(
    deployment_id=None,
    request_timeout: Optional[int] = None,
    timeout: Optional[int] = 600,
    max_workers:Optional[int]= 100,
    # Optional liteLLM function params
    **kwargs,
 ):
@ -2956,6 +3004,7 @@ def batch_completion(
        user (str, optional): The user string for generating completions. Defaults to "".
        deployment_id (optional): The deployment ID for generating completions. Defaults to None.
        request_timeout (int, optional): The request timeout for generating completions. Defaults to None.
        max_workers (int,optional): The maximum number of threads to use for parallel processing.
    Returns:
        list: A list of completion results.
@ -3001,7 +3050,7 @@ def batch_completion(
            for i in range(0, len(lst), n):
                yield lst[i : i + n]
-        with ThreadPoolExecutor(max_workers=100) as executor:
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            for sub_batch in chunks(batch_messages, 100):
                for message_list in sub_batch:
                    kwargs_modified = args.copy()
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -1173,6 +1173,18 @@
        "supports_function_calling": true,
        "supports_assistant_prefill": true
    },
    "mistral/pixtral-12b-2409": {
        "max_tokens": 128000,
        "max_input_tokens": 128000,
        "max_output_tokens": 128000,
        "input_cost_per_token": 0.00000015,
        "output_cost_per_token": 0.00000015,
        "litellm_provider": "mistral",
        "mode": "chat",
        "supports_function_calling": true,
        "supports_assistant_prefill": true,
        "supports_vision": true
    },
    "mistral/open-mistral-7b": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@ -760,7 +760,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
            return _user_id_rate_limits.model_dump()
        except Exception as e:
-            verbose_proxy_logger.exception(
+            verbose_proxy_logger.debug(
                "Parallel Request Limiter: Error getting user object", str(e)
            )
            return None
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@ -389,6 +389,9 @@ async def add_litellm_data_to_request(
        user_api_key_dict=user_api_key_dict,
    )
    verbose_proxy_logger.debug(
        f"[PROXY]returned data from litellm_pre_call_utils: {data}"
    )
    return data
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -1466,9 +1466,6 @@ class PrismaClient:
    ):
        args_passed_in = locals()
        start_time = time.time()
        verbose_proxy_logger.debug(
            f"PrismaClient: get_data - args_passed_in: {args_passed_in}"
        )
        hashed_token: Optional[str] = None
        try:
            response: Any = None
--- a/litellm/tests/test_alangfuse.py
+++ b/litellm/tests/test_alangfuse.py
@ -1224,3 +1224,14 @@ def test_langfuse_prompt_type(prompt):
    _add_prompt_to_generation_params(
        generation_params=generation_params, clean_metadata=clean_metadata
    )
 def test_langfuse_logging_metadata():
    from litellm.integrations.langfuse import log_requester_metadata
    metadata = {"key": "value", "requester_metadata": {"key": "value"}}
    got_metadata = log_requester_metadata(clean_metadata=metadata)
    expected_metadata = {"requester_metadata": {"key": "value"}}
    assert expected_metadata == got_metadata
--- a/litellm/tests/test_anthropic_prompt_caching.py
+++ b/litellm/tests/test_anthropic_prompt_caching.py
@ -61,6 +61,7 @@ async def test_litellm_anthropic_prompt_caching_tools():
        }
    mock_response.json = return_val
    mock_response.headers = {"key": "value"}
    litellm.set_verbose = True
    with patch(
@ -466,6 +467,7 @@ async def test_litellm_anthropic_prompt_caching_system():
        }
    mock_response.json = return_val
    mock_response.headers = {"key": "value"}
    litellm.set_verbose = True
    with patch(
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -24,7 +24,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.prompt_templates.factory import anthropic_messages_pt
-# litellm.num_retries = 3
+# litellm.num_retries=3
 litellm.cache = None
 litellm.success_callback = []
--- a/litellm/tests/test_custom_callback_input.py
+++ b/litellm/tests/test_custom_callback_input.py
@ -1173,7 +1173,12 @@ def test_turn_off_message_logging():
 ##### VALID JSON ######
-@pytest.mark.parametrize("model", ["gpt-3.5-turbo", "azure/chatgpt-v-2"])
+@pytest.mark.parametrize(
    "model",
    [
        "ft:gpt-3.5-turbo:my-org:custom_suffix:id"
    ],  # "gpt-3.5-turbo", "azure/chatgpt-v-2",
 )
@pytest.mark.parametrize(
    "turn_off_message_logging",
    [
@ -1200,7 +1205,7 @@ def test_standard_logging_payload(model, turn_off_message_logging):
        _ = litellm.completion(
            model=model,
            messages=[{"role": "user", "content": "Hey, how's it going?"}],
-            # mock_response="Going well!",
+            mock_response="Going well!",
        )
        time.sleep(2)
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -7,6 +7,8 @@ from typing import Any
 from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
@ -884,6 +886,42 @@ def _pre_call_utils(
    return data, original_function, mapped_target
 def _pre_call_utils_httpx(
    call_type: str,
    data: dict,
    client: Union[HTTPHandler, AsyncHTTPHandler],
    sync_mode: bool,
    streaming: Optional[bool],
 ):
    mapped_target: Any = client.client
    if call_type == "embedding":
        data["input"] = "Hello world!"
        if sync_mode:
            original_function = litellm.embedding
        else:
            original_function = litellm.aembedding
    elif call_type == "chat_completion":
        data["messages"] = [{"role": "user", "content": "Hello world"}]
        if streaming is True:
            data["stream"] = True
        if sync_mode:
            original_function = litellm.completion
        else:
            original_function = litellm.acompletion
    elif call_type == "completion":
        data["prompt"] = "Hello world"
        if streaming is True:
            data["stream"] = True
        if sync_mode:
            original_function = litellm.text_completion
        else:
            original_function = litellm.atext_completion
    return data, original_function, mapped_target
@pytest.mark.parametrize(
    "sync_mode",
    [True, False],
@ -1006,3 +1044,111 @@ async def test_exception_with_headers(sync_mode, provider, model, call_type, str
        if exception_raised is False:
            print(resp)
        assert exception_raised
@pytest.mark.parametrize(
    "sync_mode",
    [True, False],
 )
@pytest.mark.parametrize("streaming", [True, False])
@pytest.mark.parametrize(
    "provider, model, call_type",
    [
        ("anthropic", "claude-3-haiku-20240307", "chat_completion"),
    ],
 )
@pytest.mark.asyncio
 async def test_exception_with_headers_httpx(
    sync_mode, provider, model, call_type, streaming
 ):
    """
    User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds"
    but Azure says to retry in at most 9s
    ```
    {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
    ```
    """
    print(f"Received args: {locals()}")
    import openai
    if sync_mode:
        client = HTTPHandler()
    else:
        client = AsyncHTTPHandler()
    data = {"model": model}
    data, original_function, mapped_target = _pre_call_utils_httpx(
        call_type=call_type,
        data=data,
        client=client,
        sync_mode=sync_mode,
        streaming=streaming,
    )
    cooldown_time = 30.0
    def _return_exception(*args, **kwargs):
        import datetime
        from httpx import Headers, HTTPStatusError, Request, Response
        # Create the Request object
        request = Request("POST", "http://0.0.0.0:9000/chat/completions")
        # Create the Response object with the necessary headers and status code
        response = Response(
            status_code=429,
            headers=Headers(
                {
                    "date": "Sat, 21 Sep 2024 22:56:53 GMT",
                    "server": "uvicorn",
                    "retry-after": "30",
                    "content-length": "30",
                    "content-type": "application/json",
                }
            ),
            request=request,
        )
        # Create and raise the HTTPStatusError exception
        raise HTTPStatusError(
            message="Error code: 429 - Rate Limit Error!",
            request=request,
            response=response,
        )
    with patch.object(
        mapped_target,
        "send",
        side_effect=_return_exception,
    ):
        new_retry_after_mock_client = MagicMock(return_value=-1)
        litellm.utils._get_retry_after_from_exception_header = (
            new_retry_after_mock_client
        )
        exception_raised = False
        try:
            if sync_mode:
                resp = original_function(**data, client=client)
                if streaming:
                    for chunk in resp:
                        continue
            else:
                resp = await original_function(**data, client=client)
                if streaming:
                    async for chunk in resp:
                        continue
        except litellm.RateLimitError as e:
            exception_raised = True
            assert e.litellm_response_headers is not None
            print("e.litellm_response_headers", e.litellm_response_headers)
            assert int(e.litellm_response_headers["retry-after"]) == cooldown_time
        if exception_raised is False:
            print(resp)
        assert exception_raised
--- a/litellm/tests/test_function_calling.py
+++ b/litellm/tests/test_function_calling.py
@ -45,11 +45,12 @@ def get_current_weather(location, unit="fahrenheit"):
@pytest.mark.parametrize(
    "model",
    [
-        # "gpt-3.5-turbo-1106",
+        "gpt-3.5-turbo-1106",
        # "mistral/mistral-large-latest",
        # "claude-3-haiku-20240307",
        # "gemini/gemini-1.5-pro",
        "anthropic.claude-3-sonnet-20240229-v1:0",
        "groq/llama3-8b-8192",
    ],
 )
@pytest.mark.flaky(retries=3, delay=1)
@ -154,6 +155,105 @@ def test_aaparallel_function_call(model):
 # test_parallel_function_call()
 from litellm.types.utils import ChatCompletionMessageToolCall, Function, Message
@pytest.mark.parametrize(
    "model, provider",
    [
        (
            "anthropic.claude-3-sonnet-20240229-v1:0",
            "bedrock",
        ),
        ("claude-3-haiku-20240307", "anthropic"),
    ],
 )
@pytest.mark.parametrize(
    "messages, expected_error_msg",
    [
        (
            [
                {
                    "role": "user",
                    "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
                },
                Message(
                    content="Here are the current weather conditions for San Francisco, Tokyo, and Paris:",
                    role="assistant",
                    tool_calls=[
                        ChatCompletionMessageToolCall(
                            index=1,
                            function=Function(
                                arguments='{"location": "San Francisco, CA", "unit": "fahrenheit"}',
                                name="get_current_weather",
                            ),
                            id="tooluse_Jj98qn6xQlOP_PiQr-w9iA",
                            type="function",
                        )
                    ],
                    function_call=None,
                ),
                {
                    "tool_call_id": "tooluse_Jj98qn6xQlOP_PiQr-w9iA",
                    "role": "tool",
                    "name": "get_current_weather",
                    "content": '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}',
                },
            ],
            True,
        ),
        (
            [
                {
                    "role": "user",
                    "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
                }
            ],
            False,
        ),
    ],
 )
 def test_parallel_function_call_anthropic_error_msg(
    model, provider, messages, expected_error_msg
 ):
    """
    Anthropic doesn't support tool calling without `tools=` param specified.
    Ensure this error is thrown when `tools=` param is not specified. But tool call requests are made.
    Reference Issue: https://github.com/BerriAI/litellm/issues/5747, https://github.com/BerriAI/litellm/issues/5388
    """
    try:
        litellm.set_verbose = True
        messages = messages
        if expected_error_msg:
            with pytest.raises(litellm.UnsupportedParamsError) as e:
                second_response = litellm.completion(
                    model=model,
                    messages=messages,
                    temperature=0.2,
                    seed=22,
                    drop_params=True,
                )  # get a new response from the model where it can see the function response
                print("second response\n", second_response)
        else:
            second_response = litellm.completion(
                model=model,
                messages=messages,
                temperature=0.2,
                seed=22,
                drop_params=True,
            )  # get a new response from the model where it can see the function response
            print("second response\n", second_response)
    except litellm.InternalServerError as e:
        print(e)
    except litellm.RateLimitError as e:
        print(e)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 def test_parallel_function_call_stream():
    try:
--- a/litellm/tests/test_get_model_info.py
+++ b/litellm/tests/test_get_model_info.py
@ -62,3 +62,9 @@ def test_get_model_info_shows_supports_prompt_caching():
    info = litellm.get_model_info("deepseek/deepseek-chat")
    print("info", info)
    assert info.get("supports_prompt_caching") is True
 def test_get_model_info_finetuned_models():
    info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id")
    print("info", info)
    assert info["input_cost_per_token"] == 0.000003
--- a/litellm/types/llms/anthropic.py
+++ b/litellm/types/llms/anthropic.py
@ -18,13 +18,13 @@ class AnthropicMessagesTool(TypedDict, total=False):
 class AnthropicMessagesTextParam(TypedDict, total=False):
-    type: Literal["text"]
+    type: Required[Literal["text"]]
-    text: str
+    text: Required[str]
    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
 class AnthropicMessagesToolUseParam(TypedDict):
-    type: Literal["tool_use"]
+    type: Required[Literal["tool_use"]]
    id: str
    name: str
    input: dict
@ -58,8 +58,8 @@ class AnthropicImageParamSource(TypedDict):
 class AnthropicMessagesImageParam(TypedDict, total=False):
-    type: Literal["image"]
+    type: Required[Literal["image"]]
-    source: AnthropicImageParamSource
+    source: Required[AnthropicImageParamSource]
    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
@ -102,16 +102,13 @@ class AnthropicSystemMessageContent(TypedDict, total=False):
    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
-class AnthropicMessagesRequest(TypedDict, total=False):
+AllAnthropicMessageValues = Union[
-    model: Required[str]
+    AnthropicMessagesUserMessageParam, AnthopicMessagesAssistantMessageParam
-    messages: Required[
+]
-        List[
+
-            Union[
+
-                AnthropicMessagesUserMessageParam,
+class AnthropicMessageRequestBase(TypedDict, total=False):
-                AnthopicMessagesAssistantMessageParam,
+    messages: Required[List[AllAnthropicMessageValues]]
            ]
        ]
    ]
    max_tokens: Required[int]
    metadata: AnthropicMetadata
    stop_sequences: List[str]
@ -123,6 +120,9 @@ class AnthropicMessagesRequest(TypedDict, total=False):
    top_k: int
    top_p: float
 class AnthropicMessagesRequest(AnthropicMessageRequestBase, total=False):
    model: Required[str]
    # litellm param - used for tracking litellm proxy metadata in the request
    litellm_metadata: dict
@ -291,9 +291,9 @@ class AnthropicResponse(BaseModel):
    """Billing and rate-limit usage."""
-class AnthropicChatCompletionUsageBlock(TypedDict, total=False):
+from .openai import ChatCompletionUsageBlock
-    prompt_tokens: Required[int]
+
-    completion_tokens: Required[int]
+
-    total_tokens: Required[int]
+class AnthropicChatCompletionUsageBlock(ChatCompletionUsageBlock, total=False):
    cache_creation_input_tokens: int
    cache_read_input_tokens: int
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -343,11 +343,14 @@ class ChatCompletionImageObject(TypedDict):
    image_url: Union[str, ChatCompletionImageUrlObject]
 OpenAIMessageContent = Union[
    str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
 ]
 class OpenAIChatCompletionUserMessage(TypedDict):
    role: Literal["user"]
-    content: Union[
+    content: OpenAIMessageContent
        str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
    ]
 class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -7,7 +7,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
 from openai._models import BaseModel as OpenAIObject
 from openai.types.audio.transcription_create_params import FileTypes  # type: ignore
 from openai.types.completion_usage import CompletionTokensDetails, CompletionUsage
-from pydantic import ConfigDict, Field, PrivateAttr
+from pydantic import ConfigDict, PrivateAttr
 from typing_extensions import Callable, Dict, Required, TypedDict, override
 from ..litellm_core_utils.core_helpers import map_finish_reason
--- a/litellm/utils.py
+++ b/litellm/utils.py
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -1173,6 +1173,18 @@
        "supports_function_calling": true,
        "supports_assistant_prefill": true
    },
    "mistral/pixtral-12b-2409": {
        "max_tokens": 128000,
        "max_input_tokens": 128000,
        "max_output_tokens": 128000,
        "input_cost_per_token": 0.00000015,
        "output_cost_per_token": 0.00000015,
        "litellm_provider": "mistral",
        "mode": "chat",
        "supports_function_calling": true,
        "supports_assistant_prefill": true,
        "supports_vision": true
    },
    "mistral/open-mistral-7b": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py
@ -25,7 +25,12 @@ from unittest.mock import MagicMock, patch
 import pytest
 import litellm
-from litellm import AnthropicConfig, Router, adapter_completion
+from litellm import (
    AnthropicConfig,
    Router,
    adapter_completion,
    AnthropicExperimentalPassThroughConfig,
 )
 from litellm.adapters.anthropic_adapter import anthropic_adapter
 from litellm.types.llms.anthropic import AnthropicResponse
@ -33,7 +38,7 @@ from litellm.types.llms.anthropic import AnthropicResponse
 def test_anthropic_completion_messages_translation():
    messages = [{"role": "user", "content": "Hey, how's it going?"}]
-    translated_messages = AnthropicConfig().translate_anthropic_messages_to_openai(messages=messages)  # type: ignore
+    translated_messages = AnthropicExperimentalPassThroughConfig().translate_anthropic_messages_to_openai(messages=messages)  # type: ignore
    assert translated_messages == [{"role": "user", "content": "Hey, how's it going?"}]
--- a/tests/llm_translation/test_databricks.py
+++ b/tests/llm_translation/test_databricks.py
@ -5,7 +5,11 @@ import pytest
 import sys
 from typing import Any, Dict, List
 from unittest.mock import MagicMock, Mock, patch
 import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
 from litellm.exceptions import BadRequestError
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
		`@ -0,0 +1 @@`
							`from .handler import AnthropicChatCompletion, ModelResponseIterator`