LiteLLM Minor Fixes & Improvements (10/02/2024) (#6023)

* feat(together_ai/completion): handle together ai completion calls * fix: handle list of int / list of list of int for text completion calls * fix(utils.py): check if base model in bedrock converse model list Fixes https://github.com/BerriAI/litellm/issues/6003 * test(test_optional_params.py): add unit tests for bedrock optional param mapping Fixes https://github.com/BerriAI/litellm/issues/6003 * feat(utils.py): enable passing dummy tool call for anthropic/bedrock calls if tool_use blocks exist Fixes https://github.com/BerriAI/litellm/issues/5388 * fixed an issue with tool use of claude models with anthropic and bedrock (#6013) * fix(utils.py): handle empty schema for anthropic/bedrock Fixes https://github.com/BerriAI/litellm/issues/6012 * fix: fix linting errors * fix: fix linting errors * fix: fix linting errors * fix(proxy_cli.py): fix import route for app + health checks path (#6026) * (testing): Enable testing us.anthropic.claude-3-haiku-20240307-v1:0. (#6018) * fix(proxy_cli.py): fix import route for app + health checks gettsburg.wav Fixes https://github.com/BerriAI/litellm/issues/5999 --------- Co-authored-by: David Manouchehri <david.manouchehri@ai.moda> --------- Co-authored-by: Ved Patwardhan <54766411+vedpatwardhan@users.noreply.github.com> Co-authored-by: David Manouchehri <david.manouchehri@ai.moda>
2024-10-02 22:00:28 -04:00 · 2024-10-02 22:00:28 -04:00 · 14165d3648
commit 14165d3648
parent 8995ff49ae
20 changed files with 443 additions and 125 deletions
--- a/litellm/llms/OpenAI/completion/utils.py
+++ b/litellm/llms/OpenAI/completion/utils.py
@ -0,0 +1,15 @@
+from collections.abc import Iterable
+from typing import List
+
+
+def is_tokens_or_list_of_tokens(value: List):
+    # Check if it's a list of integers (tokens)
+    if isinstance(value, list) and all(isinstance(item, int) for item in value):
+        return True
+    # Check if it's a list of lists of integers (list of tokens)
+    if isinstance(value, list) and all(
+        isinstance(item, list) and all(isinstance(i, int) for i in item)
+        for item in value
+    ):
+        return True
+    return False
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@ -4,7 +4,7 @@ import os
 import time
 import traceback
 import types
-from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union
+from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union, cast

 import httpx
 import openai
@ -30,8 +30,10 @@ from litellm.utils import (

 from ...types.llms.openai import *
 from ..base import BaseLLM
+from ..prompt_templates.common_utils import convert_content_list_to_str
 from ..prompt_templates.factory import custom_prompt, prompt_factory
 from .common_utils import drop_params_from_unprocessable_entity_error
+from .completion.utils import is_tokens_or_list_of_tokens


 class OpenAIError(Exception):
@ -420,6 +422,35 @@ class OpenAITextCompletionConfig:
            and v is not None
        }

+    def _transform_prompt(
+        self,
+        messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
+    ) -> AllPromptValues:
+        if len(messages) == 1:  # base case
+            message_content = messages[0].get("content")
+            if (
+                message_content
+                and isinstance(message_content, list)
+                and is_tokens_or_list_of_tokens(message_content)
+            ):
+                openai_prompt: AllPromptValues = cast(AllPromptValues, message_content)
+            else:
+                openai_prompt = ""
+                content = convert_content_list_to_str(
+                    cast(AllMessageValues, messages[0])
+                )
+                openai_prompt += content
+        else:
+            prompt_str_list: List[str] = []
+            for m in messages:
+                try:  # expect list of int/list of list of int to be a 1 message array only.
+                    content = convert_content_list_to_str(cast(AllMessageValues, m))
+                    prompt_str_list.append(content)
+                except Exception as e:
+                    raise e
+            openai_prompt = prompt_str_list
+        return openai_prompt
+
    def convert_to_chat_model_response_object(
        self,
        response_object: Optional[TextCompletionResponse] = None,
@ -459,6 +490,7 @@ class OpenAITextCompletionConfig:


 class OpenAIChatCompletion(BaseLLM):
+
    def __init__(self) -> None:
        super().__init__()

@ -1466,7 +1498,9 @@ class OpenAIChatCompletion(BaseLLM):
        elif mode == "audio_transcription":
            # Get the current directory of the file being run
            pwd = os.path.dirname(os.path.realpath(__file__))
-            file_path = os.path.join(pwd, "../tests/gettysburg.wav")
+            file_path = os.path.join(
+                pwd, "../../../tests/gettysburg.wav"
+            )  # proxy address
            audio_file = open(file_path, "rb")
            completion = await client.audio.transcriptions.with_raw_response.create(
                file=audio_file,
@ -1502,6 +1536,8 @@ class OpenAIChatCompletion(BaseLLM):


 class OpenAITextCompletion(BaseLLM):
+    openai_text_completion_global_config = OpenAITextCompletionConfig()
+
    def __init__(self) -> None:
        super().__init__()

@ -1518,7 +1554,7 @@ class OpenAITextCompletion(BaseLLM):
        model_response: ModelResponse,
        api_key: str,
        model: str,
-        messages: list,
+        messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
        timeout: float,
        logging_obj: LiteLLMLoggingObj,
        optional_params: dict,
@ -1531,24 +1567,18 @@ class OpenAITextCompletion(BaseLLM):
        organization: Optional[str] = None,
        headers: Optional[dict] = None,
    ):
-        super().completion()
        try:
            if headers is None:
                headers = self.validate_environment(api_key=api_key)
            if model is None or messages is None:
                raise OpenAIError(status_code=422, message="Missing model or messages")

-            if (
-                len(messages) > 0
-                and "content" in messages[0]
-                and isinstance(messages[0]["content"], list)
-            ):
-                prompt = messages[0]["content"]
-            else:
-                prompt = [message["content"] for message in messages]  # type: ignore
-
            # don't send max retries to the api, if set

+            prompt = self.openai_text_completion_global_config._transform_prompt(
+                messages
+            )
+
            data = {"model": model, "prompt": prompt, **optional_params}
            max_retries = data.pop("max_retries", 2)
            ## LOGGING
--- a/litellm/llms/anthropic/chat/handler.py
+++ b/litellm/llms/anthropic/chat/handler.py
@ -551,6 +551,8 @@ class AnthropicChatCompletion(BaseLLM):
                    error_response = getattr(e, "response", None)
                    if error_headers is None and error_response:
                        error_headers = getattr(error_response, "headers", None)
+                    if error_response and hasattr(error_response, "text"):
+                        error_text = getattr(error_response, "text", error_text)
                    raise AnthropicError(
                        message=error_text,
                        status_code=status_code,
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -6,11 +6,17 @@ from litellm.llms.prompt_templates.factory import anthropic_messages_pt
 from litellm.types.llms.anthropic import (
    AnthropicMessageRequestBase,
    AnthropicMessagesRequest,
+    AnthropicMessagesTool,
    AnthropicMessagesToolChoice,
    AnthropicSystemMessageContent,
 )
-from litellm.types.llms.openai import AllMessageValues, ChatCompletionSystemMessage
-from litellm.utils import has_tool_call_blocks
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    ChatCompletionSystemMessage,
+    ChatCompletionToolParam,
+    ChatCompletionToolParamFunctionChunk,
+)
+from litellm.utils import add_dummy_tool, has_tool_call_blocks

 from ..common_utils import AnthropicError

@ -146,11 +152,16 @@ class AnthropicConfig:
            and messages is not None
            and has_tool_call_blocks(messages)
        ):
-            raise litellm.UnsupportedParamsError(
-                message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
-                model="",
-                llm_provider="anthropic",
-            )
+            if litellm.modify_params:
+                optional_params["tools"] = add_dummy_tool(
+                    custom_llm_provider="bedrock_converse"
+                )
+            else:
+                raise litellm.UnsupportedParamsError(
+                    message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.",
+                    model="",
+                    llm_provider="anthropic",
+                )

        return optional_params

@ -266,18 +277,23 @@ class AnthropicConfig:
            if "anthropic-beta" not in headers:
                # default to v1 of "anthropic-beta"
                headers["anthropic-beta"] = "tools-2024-05-16"
-
            anthropic_tools = []
            for tool in optional_params["tools"]:
                if "input_schema" in tool:  # assume in anthropic format
                    anthropic_tools.append(tool)
                else:  # assume openai tool call
                    new_tool = tool["function"]
-                    new_tool["input_schema"] = new_tool.pop("parameters")  # rename key
+                    parameters = new_tool.pop(
+                        "parameters",
+                        {
+                            "type": "object",
+                            "properties": {},
+                        },
+                    )
+                    new_tool["input_schema"] = parameters  # rename key
                    if "cache_control" in tool:
                        new_tool["cache_control"] = tool["cache_control"]
                    anthropic_tools.append(new_tool)
-
            optional_params["tools"] = anthropic_tools

        data = {
--- a/litellm/llms/azure_ai/chat/transformation.py
+++ b/litellm/llms/azure_ai/chat/transformation.py
@ -26,6 +26,7 @@ class AzureAIStudioConfig(OpenAIConfig):

    def _transform_messages(self, messages: List[AllMessageValues]) -> List:
        for message in messages:
-            message = convert_content_list_to_str(message=message)
-
+            texts = convert_content_list_to_str(message=message)
+            if texts:
+                message["content"] = texts
        return messages
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@ -22,7 +22,7 @@ from litellm.types.llms.openai import (
    ChatCompletionToolParamFunctionChunk,
 )
 from litellm.types.utils import ModelResponse, Usage
-from litellm.utils import CustomStreamWrapper, has_tool_call_blocks
+from litellm.utils import CustomStreamWrapper, add_dummy_tool, has_tool_call_blocks

 from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt
 from ..common_utils import BedrockError, get_bedrock_tool_name
@ -213,11 +213,16 @@ class AmazonConverseConfig:
            and messages is not None
            and has_tool_call_blocks(messages)
        ):
-            raise litellm.UnsupportedParamsError(
-                message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
-                model="",
-                llm_provider="anthropic",
-            )
+            if litellm.modify_params:
+                optional_params["tools"] = add_dummy_tool(
+                    custom_llm_provider="bedrock_converse"
+                )
+            else:
+                raise litellm.UnsupportedParamsError(
+                    message="Bedrock doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.",
+                    model="",
+                    llm_provider="bedrock",
+                )
        return optional_params

    def _transform_request(
--- a/litellm/llms/prompt_templates/common_utils.py
+++ b/litellm/llms/prompt_templates/common_utils.py
@ -7,7 +7,7 @@ from typing import List
 from litellm.types.llms.openai import AllMessageValues


-def convert_content_list_to_str(message: AllMessageValues) -> AllMessageValues:
+def convert_content_list_to_str(message: AllMessageValues) -> str:
    """
    - handles scenario where content is list and not string
    - content list is just text, and no images
@ -26,7 +26,4 @@ def convert_content_list_to_str(message: AllMessageValues) -> AllMessageValues:
        elif message_content is not None and isinstance(message_content, str):
            texts = message_content

-    if texts:
-        message["content"] = texts
-
-    return message
+    return texts
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -2554,7 +2554,10 @@ def _bedrock_tools_pt(tools: List) -> List[BedrockToolBlock]:
    """
    tool_block_list: List[BedrockToolBlock] = []
    for tool in tools:
-        parameters = tool.get("function", {}).get("parameters", None)
+        parameters = tool.get("function", {}).get("parameters", {
+            "type": "object",
+            "properties": {}
+        })
        name = tool.get("function", {}).get("name", "")

        # related issue: https://github.com/BerriAI/litellm/issues/5007
--- a/litellm/llms/together_ai/completion.py
+++ b/litellm/llms/together_ai/completion.py
@ -1,7 +0,0 @@
-"""
-Support for OpenAI's `/v1/completions` endpoint. 
-
-Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
-
-Docs: https://docs.together.ai/reference/completions-1
-"""
--- a/litellm/llms/together_ai/completion/handler.py
+++ b/litellm/llms/together_ai/completion/handler.py
@ -0,0 +1,61 @@
+"""
+Support for OpenAI's `/v1/completions` endpoint. 
+
+Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
+
+Docs: https://docs.together.ai/reference/completions-1
+"""
+
+from typing import Any, Callable, List, Optional, Union
+
+from litellm.litellm_core_utils.litellm_logging import Logging
+from litellm.types.llms.openai import AllMessageValues, OpenAITextCompletionUserMessage
+from litellm.utils import ModelResponse
+
+from ...OpenAI.openai import OpenAITextCompletion
+from .transformation import TogetherAITextCompletionConfig
+
+together_ai_text_completion_global_config = TogetherAITextCompletionConfig()
+
+
+class TogetherAITextCompletion(OpenAITextCompletion):
+
+    def completion(
+        self,
+        model_response: ModelResponse,
+        api_key: str,
+        model: str,
+        messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
+        timeout: float,
+        logging_obj: Logging,
+        optional_params: dict,
+        print_verbose: Optional[Callable[..., Any]] = None,
+        api_base: Optional[str] = None,
+        acompletion: bool = False,
+        litellm_params=None,
+        logger_fn=None,
+        client=None,
+        organization: Optional[str] = None,
+        headers: Optional[dict] = None,
+    ):
+        prompt = together_ai_text_completion_global_config._transform_prompt(messages)
+
+        message = OpenAITextCompletionUserMessage(role="user", content=prompt)
+        new_messages = [message]
+        return super().completion(
+            model_response=model_response,
+            api_key=api_key,
+            model=model,
+            messages=new_messages,
+            timeout=timeout,
+            logging_obj=logging_obj,
+            optional_params=optional_params,
+            print_verbose=print_verbose,
+            api_base=api_base,
+            acompletion=acompletion,
+            litellm_params=litellm_params,
+            logger_fn=logger_fn,
+            client=client,
+            organization=organization,
+            headers=headers,
+        )
--- a/litellm/llms/together_ai/completion/transformation.py
+++ b/litellm/llms/together_ai/completion/transformation.py
@ -0,0 +1,46 @@
+"""
+Translates calls from OpenAI's `/v1/completions` endpoint to TogetherAI's `/v1/completions` endpoint.
+
+Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
+
+Docs: https://docs.together.ai/reference/completions-1
+"""
+
+from typing import List, Union, cast
+
+from litellm.llms.OpenAI.completion.utils import is_tokens_or_list_of_tokens
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    AllPromptValues,
+    OpenAITextCompletionUserMessage,
+)
+
+from ...OpenAI.openai import OpenAITextCompletionConfig
+
+
+class TogetherAITextCompletionConfig(OpenAITextCompletionConfig):
+    def _transform_prompt(
+        self,
+        messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
+    ) -> AllPromptValues:
+        """
+        TogetherAI expects a string prompt.
+        """
+        initial_prompt: AllPromptValues = super()._transform_prompt(messages)
+        ## TOGETHER AI SPECIFIC VALIDATION ##
+        if isinstance(initial_prompt, list) and is_tokens_or_list_of_tokens(
+            value=initial_prompt
+        ):
+            raise ValueError("TogetherAI does not support integers as input")
+        if (
+            isinstance(initial_prompt, list)
+            and len(initial_prompt) == 1
+            and isinstance(initial_prompt[0], str)
+        ):
+            together_prompt = initial_prompt[0]
+        elif isinstance(initial_prompt, list):
+            raise ValueError("TogetherAI does not support multiple prompts.")
+        else:
+            together_prompt = cast(str, initial_prompt)
+
+        return together_prompt
--- a/litellm/main.py
+++ b/litellm/main.py
@ -112,6 +112,7 @@ from .llms.prompt_templates.factory import (
 )
 from .llms.sagemaker.sagemaker import SagemakerLLM
 from .llms.text_completion_codestral import CodestralTextCompletion
+from .llms.together_ai.completion.handler import TogetherAITextCompletion
 from .llms.triton import TritonChatCompletion
 from .llms.vertex_ai_and_google_ai_studio import (
    vertex_ai_anthropic,
@ -168,6 +169,7 @@ openai_o1_chat_completions = OpenAIO1ChatCompletion()
 openai_audio_transcriptions = OpenAIAudioTranscription()
 databricks_chat_completions = DatabricksChatCompletion()
 groq_chat_completions = GroqChatCompletion()
+together_ai_text_completions = TogetherAITextCompletion()
 azure_ai_chat_completions = AzureAIChatCompletion()
 azure_ai_embedding = AzureAIEmbedding()
 anthropic_chat_completions = AnthropicChatCompletion()
@ -1285,21 +1287,38 @@ def completion(
                prompt = " ".join([message["content"] for message in messages])  # type: ignore

            ## COMPLETION CALL
-            _response = openai_text_completions.completion(
-                model=model,
-                messages=messages,
-                model_response=model_response,
-                print_verbose=print_verbose,
-                api_key=api_key,
-                api_base=api_base,
-                acompletion=acompletion,
-                client=client,  # pass AsyncOpenAI, OpenAI client
-                logging_obj=logging,
-                optional_params=optional_params,
-                litellm_params=litellm_params,
-                logger_fn=logger_fn,
-                timeout=timeout,  # type: ignore
-            )
+            if custom_llm_provider == "together_ai":
+                _response = together_ai_text_completions.completion(
+                    model=model,
+                    messages=messages,
+                    model_response=model_response,
+                    print_verbose=print_verbose,
+                    api_key=api_key,
+                    api_base=api_base,
+                    acompletion=acompletion,
+                    client=client,  # pass AsyncOpenAI, OpenAI client
+                    logging_obj=logging,
+                    optional_params=optional_params,
+                    litellm_params=litellm_params,
+                    logger_fn=logger_fn,
+                    timeout=timeout,  # type: ignore
+                )
+            else:
+                _response = openai_text_completions.completion(
+                    model=model,
+                    messages=messages,
+                    model_response=model_response,
+                    print_verbose=print_verbose,
+                    api_key=api_key,
+                    api_base=api_base,
+                    acompletion=acompletion,
+                    client=client,  # pass AsyncOpenAI, OpenAI client
+                    logging_obj=logging,
+                    optional_params=optional_params,
+                    litellm_params=litellm_params,
+                    logger_fn=logger_fn,
+                    timeout=timeout,  # type: ignore
+                )

            if (
                optional_params.get("stream", False) is False
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,61 +1,7 @@
 model_list:
-  - model_name: fake-claude-endpoint
+  - model_name: whisper
    litellm_params:
-      model: anthropic.claude-3-sonnet-20240229-v1:0
-      api_base: https://exampleopenaiendpoint-production.up.railway.app
-      aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
-      aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
-  - model_name: gemini-vision
-    litellm_params:
-      model: vertex_ai/gemini-1.0-pro-vision-001
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
-      vertex_project: "adroit-crow-413218"
-      vertex_location: "us-central1"
-  - model_name: fake-azure-endpoint
-    litellm_params:
-      model: openai/429
-      api_key: fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railway.app
-  - model_name: fake-openai-endpoint
-    litellm_params:
-      model: gpt-3.5-turbo
-      api_base: https://exampleopenaiendpoint-production.up.railway.app
-  - model_name: o1-preview
-    litellm_params:
-      model: o1-preview
-  - model_name: rerank-english-v3.0
-    litellm_params:
-      model: cohere/rerank-english-v3.0
-      api_key: os.environ/COHERE_API_KEY
-  - model_name: azure-rerank-english-v3.0
-    litellm_params:
-      model: azure_ai/rerank-english-v3.0
-      api_base: os.environ/AZURE_AI_COHERE_API_BASE
-      api_key: os.environ/AZURE_AI_COHERE_API_KEY
-  - model_name: "databricks/*"
-    litellm_params:
-      model: "databricks/*"
-      api_key: os.environ/DATABRICKS_API_KEY
-      api_base: os.environ/DATABRICKS_API_BASE
-  - model_name: "anthropic/*"
-    litellm_params:
-      model: "anthropic/*"
-  - model_name: "*"
-    litellm_params:
-      model: "openai/*"
-  - model_name: "fireworks_ai/*"
-    litellm_params:
-      model: "fireworks_ai/*"
-      configurable_clientside_auth_params: ["api_base"]
-  - model_name: "gemini-flash-experimental"
-    litellm_params:
-      model: "vertex_ai/gemini-flash-experimental"
-
-litellm_settings:
-  json_logs: true
-  cache: true
-  cache_params:
-    type: "redis"
-    # namespace: "litellm_caching"
-    ttl: 900
-  callbacks: ["batch_redis_requests"]
+      model: whisper-1
+      api_key: os.environ/OPENAI_API_KEY
+    model_info:
+      mode: audio_transcription
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -673,6 +673,9 @@ def run_server(

        import litellm

+        # DO NOT DELETE - enables global variables to work across files
+        from litellm.proxy.proxy_server import app  # noqa
+
        if run_gunicorn is False and run_hypercorn is False:
            if ssl_certfile_path is not None and ssl_keyfile_path is not None:
                print(  # noqa
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -347,12 +347,20 @@ OpenAIMessageContent = Union[
    str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
 ]

+# The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.
+AllPromptValues = Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]
+

 class OpenAIChatCompletionUserMessage(TypedDict):
    role: Literal["user"]
    content: OpenAIMessageContent


+class OpenAITextCompletionUserMessage(TypedDict):
+    role: Literal["user"]
+    content: AllPromptValues
+
+
 class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
    cache_control: ChatCompletionCachedContent

--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -80,6 +80,7 @@ from litellm.types.llms.openai import (
    AllMessageValues,
    ChatCompletionNamedToolChoiceParam,
    ChatCompletionToolParam,
+    ChatCompletionToolParamFunctionChunk,
 )
 from litellm.types.utils import FileTypes  # type: ignore
 from litellm.types.utils import (
@ -3360,7 +3361,8 @@ def get_optional_params(
        supported_params = get_supported_openai_params(
            model=model, custom_llm_provider=custom_llm_provider
        )
-        if model in litellm.BEDROCK_CONVERSE_MODELS:
+        base_model = litellm.AmazonConverseConfig()._get_base_model(model)
+        if base_model in litellm.BEDROCK_CONVERSE_MODELS:
            _check_valid_arg(supported_params=supported_params)
            optional_params = litellm.AmazonConverseConfig().map_openai_params(
                model=model,
@ -9255,3 +9257,24 @@ def process_response_headers(response_headers: Union[httpx.Headers, dict]) -> di
        **additional_headers,
    }
    return additional_headers
+
+
+def add_dummy_tool(custom_llm_provider: str) -> List[ChatCompletionToolParam]:
+    """
+    Prevent Anthropic from raising error when tool_use block exists but no tools are provided.
+
+    Relevent Issues: https://github.com/BerriAI/litellm/issues/5388, https://github.com/BerriAI/litellm/issues/5747
+    """
+    return [
+        ChatCompletionToolParam(
+            type="function",
+            function=ChatCompletionToolParamFunctionChunk(
+                name="dummy-tool",
+                description="This is a dummy tool call",  # provided to satisfy bedrock constraint.
+                parameters={
+                    "type": "object",
+                    "properties": {},
+                },
+            ),
+        )
+    ]
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@ -75,6 +75,24 @@ def test_bedrock_optional_params_embeddings():
    assert len(optional_params) == 0


+@pytest.mark.parametrize(
+    "model",
+    [
+        "us.anthropic.claude-3-haiku-20240307-v1:0",
+        "us.meta.llama3-2-11b-instruct-v1:0",
+        "anthropic.claude-3-haiku-20240307-v1:0",
+    ],
+)
+def test_bedrock_optional_params_completions(model):
+    litellm.drop_params = True
+    optional_params = get_optional_params(
+        model=model, max_tokens=10, temperature=0.1, custom_llm_provider="bedrock"
+    )
+    print(f"optional_params: {optional_params}")
+    assert len(optional_params) == 3
+    assert optional_params == {"maxTokens": 10, "stream": False, "temperature": 0.1}
+
+
@pytest.mark.parametrize(
    "model, expected_dimensions, dimensions_kwarg",
    [
--- a/tests/local_testing/log.txt
+++ b/tests/local_testing/log.txt
@ -0,0 +1,104 @@
+============================= test session starts ==============================
+platform darwin -- Python 3.11.4, pytest-8.3.2, pluggy-1.5.0 -- /Users/krrishdholakia/Documents/litellm/myenv/bin/python3.11
+cachedir: .pytest_cache
+rootdir: /Users/krrishdholakia/Documents/litellm
+configfile: pyproject.toml
+plugins: asyncio-0.23.8, respx-0.21.1, anyio-4.6.0
+asyncio: mode=Mode.STRICT
+collecting ... collected 1 item
+
+test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307] <module 'litellm' from '/Users/krrishdholakia/Documents/litellm/litellm/__init__.py'>
+
+
+[92mRequest to litellm:[0m
+[92mlitellm.completion(model='claude-3-haiku-20240307', messages=[{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}], tools=[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], tool_choice='auto')[0m
+
+
+SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
+Final returned optional params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}}
+optional_params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}}
+SENT optional_params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}, 'max_tokens': 4096}
+tool: {'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}
+[92m
+
+POST Request Sent from LiteLLM:
+curl -X POST \
+https://api.anthropic.com/v1/messages \
+-H 'accept: *****' -H 'anthropic-version: *****' -H 'content-type: *****' -H 'x-api-key: sk-ant-api03-bJf1M8qp-JDptRcZRE5ve5efAfSIaL5u-SZ9vItIkvuFcV5cUsd********************************************' -H 'anthropic-beta: *****' \
+-d '{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}]}], 'tools': [{'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'input_schema': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}], 'tool_choice': {'type': 'auto'}, 'max_tokens': 4096, 'model': 'claude-3-haiku-20240307'}'
+[0m
+
+_is_function_call: False
+RAW RESPONSE:
+{"id":"msg_01HRugqzL4WmcxMmbvDheTph","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"Okay, let's check the current weather in those three cities:"},{"type":"tool_use","id":"toolu_016U6G3kpxjHSiJLwVCrrScz","name":"get_current_weather","input":{"location":"San Francisco","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":379,"output_tokens":87}}
+
+
+raw model_response: {"id":"msg_01HRugqzL4WmcxMmbvDheTph","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"Okay, let's check the current weather in those three cities:"},{"type":"tool_use","id":"toolu_016U6G3kpxjHSiJLwVCrrScz","name":"get_current_weather","input":{"location":"San Francisco","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":379,"output_tokens":87}}
+Logging Details LiteLLM-Success Call: Cache_hit=None
+Looking up model=claude-3-haiku-20240307 in model_cost_map
+Looking up model=claude-3-haiku-20240307 in model_cost_map
+Response
+ ModelResponse(id='chatcmpl-7222f6c2-962a-4776-8639-576723466cb7', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None))], created=1727897483, model='claude-3-haiku-20240307', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=87, prompt_tokens=379, total_tokens=466, completion_tokens_details=None))
+length of tool calls 1
+Expecting there to be 3 tool calls
+tool_calls: [ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')]
+Response message
+ Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None)
+messages: [{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None), {'tool_call_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}]
+
+
+[92mRequest to litellm:[0m
+[92mlitellm.completion(model='claude-3-haiku-20240307', messages=[{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None), {'tool_call_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}], temperature=0.2, seed=22, drop_params=True)[0m
+
+
+SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
+Final returned optional params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}]}
+optional_params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}]}
+SENT optional_params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}], 'max_tokens': 4096}
+tool: {'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}
+[92m
+
+POST Request Sent from LiteLLM:
+curl -X POST \
+https://api.anthropic.com/v1/messages \
+-H 'accept: *****' -H 'anthropic-version: *****' -H 'content-type: *****' -H 'x-api-key: sk-ant-api03-bJf1M8qp-JDptRcZRE5ve5efAfSIaL5u-SZ9vItIkvuFcV5cUsd********************************************' -H 'anthropic-beta: *****' \
+-d '{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}]}, {'role': 'assistant', 'content': [{'type': 'tool_use', 'id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'name': 'get_current_weather', 'input': {'location': 'San Francisco', 'unit': 'celsius'}}]}, {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}]}], 'temperature': 0.2, 'tools': [{'name': 'dummy-tool', 'description': '', 'input_schema': {'type': 'object', 'properties': {}}}], 'max_tokens': 4096, 'model': 'claude-3-haiku-20240307'}'
+[0m
+
+_is_function_call: False
+RAW RESPONSE:
+{"id":"msg_01Wp8NVScugz6yAGsmB5trpZ","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"The current weather in San Francisco is 72°F (22°C)."},{"type":"tool_use","id":"toolu_01HTXEYDX4MspM76STtJqs1n","name":"get_current_weather","input":{"location":"Tokyo","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":426,"output_tokens":90}}
+
+
+raw model_response: {"id":"msg_01Wp8NVScugz6yAGsmB5trpZ","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"The current weather in San Francisco is 72°F (22°C)."},{"type":"tool_use","id":"toolu_01HTXEYDX4MspM76STtJqs1n","name":"get_current_weather","input":{"location":"Tokyo","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":426,"output_tokens":90}}
+Logging Details LiteLLM-Success Call: Cache_hit=None
+Looking up model=claude-3-haiku-20240307 in model_cost_map
+Looking up model=claude-3-haiku-20240307 in model_cost_map
+second response
+ ModelResponse(id='chatcmpl-c4ed5c25-ba7c-49e5-a6be-5720ab25fff0', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content='The current weather in San Francisco is 72°F (22°C).', role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "Tokyo", "unit": "celsius"}', name='get_current_weather'), id='toolu_01HTXEYDX4MspM76STtJqs1n', type='function')], function_call=None))], created=1727897484, model='claude-3-haiku-20240307', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=90, prompt_tokens=426, total_tokens=516, completion_tokens_details=None))
+PASSED
+
+=============================== warnings summary ===============================
+../../myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284
+  /Users/krrishdholakia/Documents/litellm/myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+    warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)
+
+../../litellm/utils.py:17
+  /Users/krrishdholakia/Documents/litellm/litellm/utils.py:17: DeprecationWarning: 'imghdr' is deprecated and slated for removal in Python 3.13
+    import imghdr
+
+../../litellm/utils.py:124
+  /Users/krrishdholakia/Documents/litellm/litellm/utils.py:124: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
+    with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
+
+test_function_calling.py:56
+  /Users/krrishdholakia/Documents/litellm/tests/local_testing/test_function_calling.py:56: PytestUnknownMarkWarning: Unknown pytest.mark.flaky - is this a typo?  You can register custom marks to avoid this warning - for details, see https://docs.pytest.org/en/stable/how-to/mark.html
+    @pytest.mark.flaky(retries=3, delay=1)
+
+tests/local_testing/test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307]
+tests/local_testing/test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307]
+  /Users/krrishdholakia/Documents/litellm/myenv/lib/python3.11/site-packages/httpx/_content.py:202: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content.
+    warnings.warn(message, DeprecationWarning)
+
+-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
+======================== 1 passed, 6 warnings in 1.89s =========================
--- a/tests/local_testing/test_function_calling.py
+++ b/tests/local_testing/test_function_calling.py
@ -47,16 +47,17 @@ def get_current_weather(location, unit="fahrenheit"):
    [
        "gpt-3.5-turbo-1106",
        # "mistral/mistral-large-latest",
-        # "claude-3-haiku-20240307",
-        # "gemini/gemini-1.5-pro",
+        "claude-3-haiku-20240307",
+        "gemini/gemini-1.5-pro",
        "anthropic.claude-3-sonnet-20240229-v1:0",
-        "groq/llama3-8b-8192",
+        # "groq/llama3-8b-8192",
    ],
 )
@pytest.mark.flaky(retries=3, delay=1)
 def test_aaparallel_function_call(model):
    try:
        litellm.set_verbose = True
+        litellm.modify_params = True
        # Step 1: send the conversation and available functions to the model
        messages = [
            {
@ -97,7 +98,6 @@ def test_aaparallel_function_call(model):
        response_message = response.choices[0].message
        tool_calls = response_message.tool_calls

-        print("length of tool calls", len(tool_calls))
        print("Expecting there to be 3 tool calls")
        assert (
            len(tool_calls) > 0
@ -141,7 +141,7 @@ def test_aaparallel_function_call(model):
                messages=messages,
                temperature=0.2,
                seed=22,
-                tools=tools,
+                # tools=tools,
                drop_params=True,
            )  # get a new response from the model where it can see the function response
            print("second response\n", second_response)
@ -445,3 +445,29 @@ def test_groq_parallel_function_call():
                print("second response\n", second_response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "anthropic.claude-3-sonnet-20240229-v1:0",
+        "claude-3-haiku-20240307",
+    ],
+)
+def test_anthropic_function_call_with_no_schema(model):
+    """
+    Relevant Issue: https://github.com/BerriAI/litellm/issues/6012
+    """
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_current_weather",
+                "description": "Get the current weather in New York",
+            },
+        }
+    ]
+    messages = [
+        {"role": "user", "content": "What is the current temperature in New York?"}
+    ]
+    completion(model=model, messages=messages, tools=tools, tool_choice="auto")
--- a/tests/local_testing/test_text_completion.py
+++ b/tests/local_testing/test_text_completion.py
@ -4019,7 +4019,7 @@ def test_async_text_completion():
    asyncio.run(test_get_response())


-@pytest.mark.skip(reason="Skip flaky tgai test")
+@pytest.mark.flaky(retries=6, delay=1)
 def test_async_text_completion_together_ai():
    litellm.set_verbose = True
    print("test_async_text_completion")
@ -4032,6 +4032,8 @@ def test_async_text_completion_together_ai():
                max_tokens=10,
            )
            print(f"response: {response}")
+        except litellm.RateLimitError as e:
+            print(e)
        except litellm.Timeout as e:
            print(e)
        except Exception as e: