LiteLLM Minor Fixes & Improvements (10/02/2024) (#6023)

* feat(together_ai/completion): handle together ai completion calls * fix: handle list of int / list of list of int for text completion calls * fix(utils.py): check if base model in bedrock converse model list Fixes https://github.com/BerriAI/litellm/issues/6003 * test(test_optional_params.py): add unit tests for bedrock optional param mapping Fixes https://github.com/BerriAI/litellm/issues/6003 * feat(utils.py): enable passing dummy tool call for anthropic/bedrock calls if tool_use blocks exist Fixes https://github.com/BerriAI/litellm/issues/5388 * fixed an issue with tool use of claude models with anthropic and bedrock (#6013) * fix(utils.py): handle empty schema for anthropic/bedrock Fixes https://github.com/BerriAI/litellm/issues/6012 * fix: fix linting errors * fix: fix linting errors * fix: fix linting errors * fix(proxy_cli.py): fix import route for app + health checks path (#6026) * (testing): Enable testing us.anthropic.claude-3-haiku-20240307-v1:0. (#6018) * fix(proxy_cli.py): fix import route for app + health checks gettsburg.wav Fixes https://github.com/BerriAI/litellm/issues/5999 --------- Co-authored-by: David Manouchehri <david.manouchehri@ai.moda> --------- Co-authored-by: Ved Patwardhan <54766411+vedpatwardhan@users.noreply.github.com> Co-authored-by: David Manouchehri <david.manouchehri@ai.moda>
2024-10-02 22:00:28 -04:00 · 2024-10-02 22:00:28 -04:00 · 14165d3648
commit 14165d3648
parent 8995ff49ae
20 changed files with 443 additions and 125 deletions
--- a/litellm/llms/OpenAI/completion/utils.py
+++ b/litellm/llms/OpenAI/completion/utils.py
@ -0,0 +1,15 @@
 from collections.abc import Iterable
 from typing import List
 def is_tokens_or_list_of_tokens(value: List):
    # Check if it's a list of integers (tokens)
    if isinstance(value, list) and all(isinstance(item, int) for item in value):
        return True
    # Check if it's a list of lists of integers (list of tokens)
    if isinstance(value, list) and all(
        isinstance(item, list) and all(isinstance(i, int) for i in item)
        for item in value
    ):
        return True
    return False
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@ -4,7 +4,7 @@ import os
 import time
 import traceback
 import types
-from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union
+from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union, cast
 import httpx
 import openai
@ -30,8 +30,10 @@ from litellm.utils import (
 from ...types.llms.openai import *
 from ..base import BaseLLM
 from ..prompt_templates.common_utils import convert_content_list_to_str
 from ..prompt_templates.factory import custom_prompt, prompt_factory
 from .common_utils import drop_params_from_unprocessable_entity_error
 from .completion.utils import is_tokens_or_list_of_tokens
 class OpenAIError(Exception):
@ -420,6 +422,35 @@ class OpenAITextCompletionConfig:
            and v is not None
        }
    def _transform_prompt(
        self,
        messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
    ) -> AllPromptValues:
        if len(messages) == 1:  # base case
            message_content = messages[0].get("content")
            if (
                message_content
                and isinstance(message_content, list)
                and is_tokens_or_list_of_tokens(message_content)
            ):
                openai_prompt: AllPromptValues = cast(AllPromptValues, message_content)
            else:
                openai_prompt = ""
                content = convert_content_list_to_str(
                    cast(AllMessageValues, messages[0])
                )
                openai_prompt += content
        else:
            prompt_str_list: List[str] = []
            for m in messages:
                try:  # expect list of int/list of list of int to be a 1 message array only.
                    content = convert_content_list_to_str(cast(AllMessageValues, m))
                    prompt_str_list.append(content)
                except Exception as e:
                    raise e
            openai_prompt = prompt_str_list
        return openai_prompt
    def convert_to_chat_model_response_object(
        self,
        response_object: Optional[TextCompletionResponse] = None,
@ -459,6 +490,7 @@ class OpenAITextCompletionConfig:
 class OpenAIChatCompletion(BaseLLM):
    def __init__(self) -> None:
        super().__init__()
@ -1466,7 +1498,9 @@ class OpenAIChatCompletion(BaseLLM):
        elif mode == "audio_transcription":
            # Get the current directory of the file being run
            pwd = os.path.dirname(os.path.realpath(__file__))
-            file_path = os.path.join(pwd, "../tests/gettysburg.wav")
+            file_path = os.path.join(
                pwd, "../../../tests/gettysburg.wav"
            )  # proxy address
            audio_file = open(file_path, "rb")
            completion = await client.audio.transcriptions.with_raw_response.create(
                file=audio_file,
@ -1502,6 +1536,8 @@ class OpenAIChatCompletion(BaseLLM):
 class OpenAITextCompletion(BaseLLM):
    openai_text_completion_global_config = OpenAITextCompletionConfig()
    def __init__(self) -> None:
        super().__init__()
@ -1518,7 +1554,7 @@ class OpenAITextCompletion(BaseLLM):
        model_response: ModelResponse,
        api_key: str,
        model: str,
-        messages: list,
+        messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
        timeout: float,
        logging_obj: LiteLLMLoggingObj,
        optional_params: dict,
@ -1531,24 +1567,18 @@ class OpenAITextCompletion(BaseLLM):
        organization: Optional[str] = None,
        headers: Optional[dict] = None,
    ):
        super().completion()
        try:
            if headers is None:
                headers = self.validate_environment(api_key=api_key)
            if model is None or messages is None:
                raise OpenAIError(status_code=422, message="Missing model or messages")
            if (
                len(messages) > 0
                and "content" in messages[0]
                and isinstance(messages[0]["content"], list)
            ):
                prompt = messages[0]["content"]
            else:
                prompt = [message["content"] for message in messages]  # type: ignore
            # don't send max retries to the api, if set
            prompt = self.openai_text_completion_global_config._transform_prompt(
                messages
            )
            data = {"model": model, "prompt": prompt, **optional_params}
            max_retries = data.pop("max_retries", 2)
            ## LOGGING
--- a/litellm/llms/anthropic/chat/handler.py
+++ b/litellm/llms/anthropic/chat/handler.py
@ -551,6 +551,8 @@ class AnthropicChatCompletion(BaseLLM):
                    error_response = getattr(e, "response", None)
                    if error_headers is None and error_response:
                        error_headers = getattr(error_response, "headers", None)
                    if error_response and hasattr(error_response, "text"):
                        error_text = getattr(error_response, "text", error_text)
                    raise AnthropicError(
                        message=error_text,
                        status_code=status_code,
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -6,11 +6,17 @@ from litellm.llms.prompt_templates.factory import anthropic_messages_pt
 from litellm.types.llms.anthropic import (
    AnthropicMessageRequestBase,
    AnthropicMessagesRequest,
    AnthropicMessagesTool,
    AnthropicMessagesToolChoice,
    AnthropicSystemMessageContent,
 )
-from litellm.types.llms.openai import AllMessageValues, ChatCompletionSystemMessage
+from litellm.types.llms.openai import (
-from litellm.utils import has_tool_call_blocks
+    AllMessageValues,
    ChatCompletionSystemMessage,
    ChatCompletionToolParam,
    ChatCompletionToolParamFunctionChunk,
 )
 from litellm.utils import add_dummy_tool, has_tool_call_blocks
 from ..common_utils import AnthropicError
@ -146,8 +152,13 @@ class AnthropicConfig:
            and messages is not None
            and has_tool_call_blocks(messages)
        ):
            if litellm.modify_params:
                optional_params["tools"] = add_dummy_tool(
                    custom_llm_provider="bedrock_converse"
                )
            else:
                raise litellm.UnsupportedParamsError(
-                message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
+                    message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.",
                    model="",
                    llm_provider="anthropic",
                )
@ -266,18 +277,23 @@ class AnthropicConfig:
            if "anthropic-beta" not in headers:
                # default to v1 of "anthropic-beta"
                headers["anthropic-beta"] = "tools-2024-05-16"
            anthropic_tools = []
            for tool in optional_params["tools"]:
                if "input_schema" in tool:  # assume in anthropic format
                    anthropic_tools.append(tool)
                else:  # assume openai tool call
                    new_tool = tool["function"]
-                    new_tool["input_schema"] = new_tool.pop("parameters")  # rename key
+                    parameters = new_tool.pop(
                        "parameters",
                        {
                            "type": "object",
                            "properties": {},
                        },
                    )
                    new_tool["input_schema"] = parameters  # rename key
                    if "cache_control" in tool:
                        new_tool["cache_control"] = tool["cache_control"]
                    anthropic_tools.append(new_tool)
            optional_params["tools"] = anthropic_tools
        data = {
--- a/litellm/llms/azure_ai/chat/transformation.py
+++ b/litellm/llms/azure_ai/chat/transformation.py
@ -26,6 +26,7 @@ class AzureAIStudioConfig(OpenAIConfig):
    def _transform_messages(self, messages: List[AllMessageValues]) -> List:
        for message in messages:
-            message = convert_content_list_to_str(message=message)
+            texts = convert_content_list_to_str(message=message)
-
+            if texts:
                message["content"] = texts
        return messages
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@ -22,7 +22,7 @@ from litellm.types.llms.openai import (
    ChatCompletionToolParamFunctionChunk,
 )
 from litellm.types.utils import ModelResponse, Usage
-from litellm.utils import CustomStreamWrapper, has_tool_call_blocks
+from litellm.utils import CustomStreamWrapper, add_dummy_tool, has_tool_call_blocks
 from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt
 from ..common_utils import BedrockError, get_bedrock_tool_name
@ -213,10 +213,15 @@ class AmazonConverseConfig:
            and messages is not None
            and has_tool_call_blocks(messages)
        ):
            if litellm.modify_params:
                optional_params["tools"] = add_dummy_tool(
                    custom_llm_provider="bedrock_converse"
                )
            else:
                raise litellm.UnsupportedParamsError(
-                message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
+                    message="Bedrock doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.",
                    model="",
-                llm_provider="anthropic",
+                    llm_provider="bedrock",
                )
        return optional_params
--- a/litellm/llms/prompt_templates/common_utils.py
+++ b/litellm/llms/prompt_templates/common_utils.py
@ -7,7 +7,7 @@ from typing import List
 from litellm.types.llms.openai import AllMessageValues
-def convert_content_list_to_str(message: AllMessageValues) -> AllMessageValues:
+def convert_content_list_to_str(message: AllMessageValues) -> str:
    """
    - handles scenario where content is list and not string
    - content list is just text, and no images
@ -26,7 +26,4 @@ def convert_content_list_to_str(message: AllMessageValues) -> AllMessageValues:
        elif message_content is not None and isinstance(message_content, str):
            texts = message_content
-    if texts:
+    return texts
        message["content"] = texts
    return message
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -2554,7 +2554,10 @@ def _bedrock_tools_pt(tools: List) -> List[BedrockToolBlock]:
    """
    tool_block_list: List[BedrockToolBlock] = []
    for tool in tools:
-        parameters = tool.get("function", {}).get("parameters", None)
+        parameters = tool.get("function", {}).get("parameters", {
            "type": "object",
            "properties": {}
        })
        name = tool.get("function", {}).get("name", "")
        # related issue: https://github.com/BerriAI/litellm/issues/5007
--- a/litellm/llms/together_ai/completion.py
+++ b/litellm/llms/together_ai/completion.py
@ -1,7 +0,0 @@
 """
 Support for OpenAI's `/v1/completions` endpoint. 
 Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
 Docs: https://docs.together.ai/reference/completions-1
 """
--- a/litellm/llms/together_ai/completion/handler.py
+++ b/litellm/llms/together_ai/completion/handler.py
@ -0,0 +1,61 @@
 """
 Support for OpenAI's `/v1/completions` endpoint. 
 Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
 Docs: https://docs.together.ai/reference/completions-1
 """
 from typing import Any, Callable, List, Optional, Union
 from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.types.llms.openai import AllMessageValues, OpenAITextCompletionUserMessage
 from litellm.utils import ModelResponse
 from ...OpenAI.openai import OpenAITextCompletion
 from .transformation import TogetherAITextCompletionConfig
 together_ai_text_completion_global_config = TogetherAITextCompletionConfig()
 class TogetherAITextCompletion(OpenAITextCompletion):
    def completion(
        self,
        model_response: ModelResponse,
        api_key: str,
        model: str,
        messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
        timeout: float,
        logging_obj: Logging,
        optional_params: dict,
        print_verbose: Optional[Callable[..., Any]] = None,
        api_base: Optional[str] = None,
        acompletion: bool = False,
        litellm_params=None,
        logger_fn=None,
        client=None,
        organization: Optional[str] = None,
        headers: Optional[dict] = None,
    ):
        prompt = together_ai_text_completion_global_config._transform_prompt(messages)
        message = OpenAITextCompletionUserMessage(role="user", content=prompt)
        new_messages = [message]
        return super().completion(
            model_response=model_response,
            api_key=api_key,
            model=model,
            messages=new_messages,
            timeout=timeout,
            logging_obj=logging_obj,
            optional_params=optional_params,
            print_verbose=print_verbose,
            api_base=api_base,
            acompletion=acompletion,
            litellm_params=litellm_params,
            logger_fn=logger_fn,
            client=client,
            organization=organization,
            headers=headers,
        )
--- a/litellm/llms/together_ai/completion/transformation.py
+++ b/litellm/llms/together_ai/completion/transformation.py
@ -0,0 +1,46 @@
 """
 Translates calls from OpenAI's `/v1/completions` endpoint to TogetherAI's `/v1/completions` endpoint.
 Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
 Docs: https://docs.together.ai/reference/completions-1
 """
 from typing import List, Union, cast
 from litellm.llms.OpenAI.completion.utils import is_tokens_or_list_of_tokens
 from litellm.types.llms.openai import (
    AllMessageValues,
    AllPromptValues,
    OpenAITextCompletionUserMessage,
 )
 from ...OpenAI.openai import OpenAITextCompletionConfig
 class TogetherAITextCompletionConfig(OpenAITextCompletionConfig):
    def _transform_prompt(
        self,
        messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
    ) -> AllPromptValues:
        """
        TogetherAI expects a string prompt.
        """
        initial_prompt: AllPromptValues = super()._transform_prompt(messages)
        ## TOGETHER AI SPECIFIC VALIDATION ##
        if isinstance(initial_prompt, list) and is_tokens_or_list_of_tokens(
            value=initial_prompt
        ):
            raise ValueError("TogetherAI does not support integers as input")
        if (
            isinstance(initial_prompt, list)
            and len(initial_prompt) == 1
            and isinstance(initial_prompt[0], str)
        ):
            together_prompt = initial_prompt[0]
        elif isinstance(initial_prompt, list):
            raise ValueError("TogetherAI does not support multiple prompts.")
        else:
            together_prompt = cast(str, initial_prompt)
        return together_prompt
--- a/litellm/main.py
+++ b/litellm/main.py
@ -112,6 +112,7 @@ from .llms.prompt_templates.factory import (
 )
 from .llms.sagemaker.sagemaker import SagemakerLLM
 from .llms.text_completion_codestral import CodestralTextCompletion
 from .llms.together_ai.completion.handler import TogetherAITextCompletion
 from .llms.triton import TritonChatCompletion
 from .llms.vertex_ai_and_google_ai_studio import (
    vertex_ai_anthropic,
@ -168,6 +169,7 @@ openai_o1_chat_completions = OpenAIO1ChatCompletion()
 openai_audio_transcriptions = OpenAIAudioTranscription()
 databricks_chat_completions = DatabricksChatCompletion()
 groq_chat_completions = GroqChatCompletion()
 together_ai_text_completions = TogetherAITextCompletion()
 azure_ai_chat_completions = AzureAIChatCompletion()
 azure_ai_embedding = AzureAIEmbedding()
 anthropic_chat_completions = AnthropicChatCompletion()
@ -1285,6 +1287,23 @@ def completion(
                prompt = " ".join([message["content"] for message in messages])  # type: ignore
            ## COMPLETION CALL
            if custom_llm_provider == "together_ai":
                _response = together_ai_text_completions.completion(
                    model=model,
                    messages=messages,
                    model_response=model_response,
                    print_verbose=print_verbose,
                    api_key=api_key,
                    api_base=api_base,
                    acompletion=acompletion,
                    client=client,  # pass AsyncOpenAI, OpenAI client
                    logging_obj=logging,
                    optional_params=optional_params,
                    litellm_params=litellm_params,
                    logger_fn=logger_fn,
                    timeout=timeout,  # type: ignore
                )
            else:
                _response = openai_text_completions.completion(
                    model=model,
                    messages=messages,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,61 +1,7 @@
 model_list:
-  - model_name: fake-claude-endpoint
+  - model_name: whisper
    litellm_params:
-      model: anthropic.claude-3-sonnet-20240229-v1:0
+      model: whisper-1
-      api_base: https://exampleopenaiendpoint-production.up.railway.app
+      api_key: os.environ/OPENAI_API_KEY
-      aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
+    model_info:
-      aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
+      mode: audio_transcription
  - model_name: gemini-vision
    litellm_params:
      model: vertex_ai/gemini-1.0-pro-vision-001
      api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
      vertex_project: "adroit-crow-413218"
      vertex_location: "us-central1"
  - model_name: fake-azure-endpoint
    litellm_params:
      model: openai/429
      api_key: fake-key
      api_base: https://exampleopenaiendpoint-production.up.railway.app
  - model_name: fake-openai-endpoint
    litellm_params:
      model: gpt-3.5-turbo
      api_base: https://exampleopenaiendpoint-production.up.railway.app
  - model_name: o1-preview
    litellm_params:
      model: o1-preview
  - model_name: rerank-english-v3.0
    litellm_params:
      model: cohere/rerank-english-v3.0
      api_key: os.environ/COHERE_API_KEY
  - model_name: azure-rerank-english-v3.0
    litellm_params:
      model: azure_ai/rerank-english-v3.0
      api_base: os.environ/AZURE_AI_COHERE_API_BASE
      api_key: os.environ/AZURE_AI_COHERE_API_KEY
  - model_name: "databricks/*"
    litellm_params:
      model: "databricks/*"
      api_key: os.environ/DATABRICKS_API_KEY
      api_base: os.environ/DATABRICKS_API_BASE
  - model_name: "anthropic/*"
    litellm_params:
      model: "anthropic/*"
  - model_name: "*"
    litellm_params:
      model: "openai/*"
  - model_name: "fireworks_ai/*"
    litellm_params:
      model: "fireworks_ai/*"
      configurable_clientside_auth_params: ["api_base"]
  - model_name: "gemini-flash-experimental"
    litellm_params:
      model: "vertex_ai/gemini-flash-experimental"
 litellm_settings:
  json_logs: true
  cache: true
  cache_params:
    type: "redis"
    # namespace: "litellm_caching"
    ttl: 900
  callbacks: ["batch_redis_requests"]
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -673,6 +673,9 @@ def run_server(
        import litellm
        # DO NOT DELETE - enables global variables to work across files
        from litellm.proxy.proxy_server import app  # noqa
        if run_gunicorn is False and run_hypercorn is False:
            if ssl_certfile_path is not None and ssl_keyfile_path is not None:
                print(  # noqa
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -347,12 +347,20 @@ OpenAIMessageContent = Union[
    str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
 ]
 # The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.
 AllPromptValues = Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]
 class OpenAIChatCompletionUserMessage(TypedDict):
    role: Literal["user"]
    content: OpenAIMessageContent
 class OpenAITextCompletionUserMessage(TypedDict):
    role: Literal["user"]
    content: AllPromptValues
 class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
    cache_control: ChatCompletionCachedContent
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -80,6 +80,7 @@ from litellm.types.llms.openai import (
    AllMessageValues,
    ChatCompletionNamedToolChoiceParam,
    ChatCompletionToolParam,
    ChatCompletionToolParamFunctionChunk,
 )
 from litellm.types.utils import FileTypes  # type: ignore
 from litellm.types.utils import (
@ -3360,7 +3361,8 @@ def get_optional_params(
        supported_params = get_supported_openai_params(
            model=model, custom_llm_provider=custom_llm_provider
        )
-        if model in litellm.BEDROCK_CONVERSE_MODELS:
+        base_model = litellm.AmazonConverseConfig()._get_base_model(model)
        if base_model in litellm.BEDROCK_CONVERSE_MODELS:
            _check_valid_arg(supported_params=supported_params)
            optional_params = litellm.AmazonConverseConfig().map_openai_params(
                model=model,
@ -9255,3 +9257,24 @@ def process_response_headers(response_headers: Union[httpx.Headers, dict]) -> di
        **additional_headers,
    }
    return additional_headers
 def add_dummy_tool(custom_llm_provider: str) -> List[ChatCompletionToolParam]:
    """
    Prevent Anthropic from raising error when tool_use block exists but no tools are provided.
    Relevent Issues: https://github.com/BerriAI/litellm/issues/5388, https://github.com/BerriAI/litellm/issues/5747
    """
    return [
        ChatCompletionToolParam(
            type="function",
            function=ChatCompletionToolParamFunctionChunk(
                name="dummy-tool",
                description="This is a dummy tool call",  # provided to satisfy bedrock constraint.
                parameters={
                    "type": "object",
                    "properties": {},
                },
            ),
        )
    ]
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@ -75,6 +75,24 @@ def test_bedrock_optional_params_embeddings():
    assert len(optional_params) == 0
@pytest.mark.parametrize(
    "model",
    [
        "us.anthropic.claude-3-haiku-20240307-v1:0",
        "us.meta.llama3-2-11b-instruct-v1:0",
        "anthropic.claude-3-haiku-20240307-v1:0",
    ],
 )
 def test_bedrock_optional_params_completions(model):
    litellm.drop_params = True
    optional_params = get_optional_params(
        model=model, max_tokens=10, temperature=0.1, custom_llm_provider="bedrock"
    )
    print(f"optional_params: {optional_params}")
    assert len(optional_params) == 3
    assert optional_params == {"maxTokens": 10, "stream": False, "temperature": 0.1}
@pytest.mark.parametrize(
    "model, expected_dimensions, dimensions_kwarg",
    [
--- a/tests/local_testing/log.txt
+++ b/tests/local_testing/log.txt
@ -0,0 +1,104 @@
 ============================= test session starts ==============================
 platform darwin -- Python 3.11.4, pytest-8.3.2, pluggy-1.5.0 -- /Users/krrishdholakia/Documents/litellm/myenv/bin/python3.11
 cachedir: .pytest_cache
 rootdir: /Users/krrishdholakia/Documents/litellm
 configfile: pyproject.toml
 plugins: asyncio-0.23.8, respx-0.21.1, anyio-4.6.0
 asyncio: mode=Mode.STRICT
 collecting ... collected 1 item
 test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307] <module 'litellm' from '/Users/krrishdholakia/Documents/litellm/litellm/__init__.py'>
 [92mRequest to litellm:[0m
 [92mlitellm.completion(model='claude-3-haiku-20240307', messages=[{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}], tools=[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], tool_choice='auto')[0m
 SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
 Final returned optional params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}}
 optional_params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}}
 SENT optional_params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}, 'max_tokens': 4096}
 tool: {'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}
 [92m
 POST Request Sent from LiteLLM:
 curl -X POST \
 https://api.anthropic.com/v1/messages \
 -H 'accept: *****' -H 'anthropic-version: *****' -H 'content-type: *****' -H 'x-api-key: sk-ant-api03-bJf1M8qp-JDptRcZRE5ve5efAfSIaL5u-SZ9vItIkvuFcV5cUsd********************************************' -H 'anthropic-beta: *****' \
 -d '{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}]}], 'tools': [{'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'input_schema': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}], 'tool_choice': {'type': 'auto'}, 'max_tokens': 4096, 'model': 'claude-3-haiku-20240307'}'
 [0m
 _is_function_call: False
 RAW RESPONSE:
 {"id":"msg_01HRugqzL4WmcxMmbvDheTph","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"Okay, let's check the current weather in those three cities:"},{"type":"tool_use","id":"toolu_016U6G3kpxjHSiJLwVCrrScz","name":"get_current_weather","input":{"location":"San Francisco","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":379,"output_tokens":87}}
 raw model_response: {"id":"msg_01HRugqzL4WmcxMmbvDheTph","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"Okay, let's check the current weather in those three cities:"},{"type":"tool_use","id":"toolu_016U6G3kpxjHSiJLwVCrrScz","name":"get_current_weather","input":{"location":"San Francisco","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":379,"output_tokens":87}}
 Logging Details LiteLLM-Success Call: Cache_hit=None
 Looking up model=claude-3-haiku-20240307 in model_cost_map
 Looking up model=claude-3-haiku-20240307 in model_cost_map
 Response
 ModelResponse(id='chatcmpl-7222f6c2-962a-4776-8639-576723466cb7', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None))], created=1727897483, model='claude-3-haiku-20240307', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=87, prompt_tokens=379, total_tokens=466, completion_tokens_details=None))
 length of tool calls 1
 Expecting there to be 3 tool calls
 tool_calls: [ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')]
 Response message
 Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None)
 messages: [{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None), {'tool_call_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}]
 [92mRequest to litellm:[0m
 [92mlitellm.completion(model='claude-3-haiku-20240307', messages=[{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None), {'tool_call_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}], temperature=0.2, seed=22, drop_params=True)[0m
 SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
 Final returned optional params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}]}
 optional_params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}]}
 SENT optional_params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}], 'max_tokens': 4096}
 tool: {'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}
 [92m
 POST Request Sent from LiteLLM:
 curl -X POST \
 https://api.anthropic.com/v1/messages \
 -H 'accept: *****' -H 'anthropic-version: *****' -H 'content-type: *****' -H 'x-api-key: sk-ant-api03-bJf1M8qp-JDptRcZRE5ve5efAfSIaL5u-SZ9vItIkvuFcV5cUsd********************************************' -H 'anthropic-beta: *****' \
 -d '{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}]}, {'role': 'assistant', 'content': [{'type': 'tool_use', 'id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'name': 'get_current_weather', 'input': {'location': 'San Francisco', 'unit': 'celsius'}}]}, {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}]}], 'temperature': 0.2, 'tools': [{'name': 'dummy-tool', 'description': '', 'input_schema': {'type': 'object', 'properties': {}}}], 'max_tokens': 4096, 'model': 'claude-3-haiku-20240307'}'
 [0m
 _is_function_call: False
 RAW RESPONSE:
 {"id":"msg_01Wp8NVScugz6yAGsmB5trpZ","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"The current weather in San Francisco is 72°F (22°C)."},{"type":"tool_use","id":"toolu_01HTXEYDX4MspM76STtJqs1n","name":"get_current_weather","input":{"location":"Tokyo","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":426,"output_tokens":90}}
 raw model_response: {"id":"msg_01Wp8NVScugz6yAGsmB5trpZ","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"The current weather in San Francisco is 72°F (22°C)."},{"type":"tool_use","id":"toolu_01HTXEYDX4MspM76STtJqs1n","name":"get_current_weather","input":{"location":"Tokyo","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":426,"output_tokens":90}}
 Logging Details LiteLLM-Success Call: Cache_hit=None
 Looking up model=claude-3-haiku-20240307 in model_cost_map
 Looking up model=claude-3-haiku-20240307 in model_cost_map
 second response
 ModelResponse(id='chatcmpl-c4ed5c25-ba7c-49e5-a6be-5720ab25fff0', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content='The current weather in San Francisco is 72°F (22°C).', role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "Tokyo", "unit": "celsius"}', name='get_current_weather'), id='toolu_01HTXEYDX4MspM76STtJqs1n', type='function')], function_call=None))], created=1727897484, model='claude-3-haiku-20240307', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=90, prompt_tokens=426, total_tokens=516, completion_tokens_details=None))
 PASSED
 =============================== warnings summary ===============================
 ../../myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284
  /Users/krrishdholakia/Documents/litellm/myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
    warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)
 ../../litellm/utils.py:17
  /Users/krrishdholakia/Documents/litellm/litellm/utils.py:17: DeprecationWarning: 'imghdr' is deprecated and slated for removal in Python 3.13
    import imghdr
 ../../litellm/utils.py:124
  /Users/krrishdholakia/Documents/litellm/litellm/utils.py:124: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
    with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
 test_function_calling.py:56
  /Users/krrishdholakia/Documents/litellm/tests/local_testing/test_function_calling.py:56: PytestUnknownMarkWarning: Unknown pytest.mark.flaky - is this a typo?  You can register custom marks to avoid this warning - for details, see https://docs.pytest.org/en/stable/how-to/mark.html
    @pytest.mark.flaky(retries=3, delay=1)
 tests/local_testing/test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307]
 tests/local_testing/test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307]
  /Users/krrishdholakia/Documents/litellm/myenv/lib/python3.11/site-packages/httpx/_content.py:202: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content.
    warnings.warn(message, DeprecationWarning)
 -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
 ======================== 1 passed, 6 warnings in 1.89s =========================
--- a/tests/local_testing/test_function_calling.py
+++ b/tests/local_testing/test_function_calling.py
@ -47,16 +47,17 @@ def get_current_weather(location, unit="fahrenheit"):
    [
        "gpt-3.5-turbo-1106",
        # "mistral/mistral-large-latest",
-        # "claude-3-haiku-20240307",
+        "claude-3-haiku-20240307",
-        # "gemini/gemini-1.5-pro",
+        "gemini/gemini-1.5-pro",
        "anthropic.claude-3-sonnet-20240229-v1:0",
-        "groq/llama3-8b-8192",
+        # "groq/llama3-8b-8192",
    ],
 )
@pytest.mark.flaky(retries=3, delay=1)
 def test_aaparallel_function_call(model):
    try:
        litellm.set_verbose = True
        litellm.modify_params = True
        # Step 1: send the conversation and available functions to the model
        messages = [
            {
@ -97,7 +98,6 @@ def test_aaparallel_function_call(model):
        response_message = response.choices[0].message
        tool_calls = response_message.tool_calls
        print("length of tool calls", len(tool_calls))
        print("Expecting there to be 3 tool calls")
        assert (
            len(tool_calls) > 0
@ -141,7 +141,7 @@ def test_aaparallel_function_call(model):
                messages=messages,
                temperature=0.2,
                seed=22,
-                tools=tools,
+                # tools=tools,
                drop_params=True,
            )  # get a new response from the model where it can see the function response
            print("second response\n", second_response)
@ -445,3 +445,29 @@ def test_groq_parallel_function_call():
                print("second response\n", second_response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
@pytest.mark.parametrize(
    "model",
    [
        "anthropic.claude-3-sonnet-20240229-v1:0",
        "claude-3-haiku-20240307",
    ],
 )
 def test_anthropic_function_call_with_no_schema(model):
    """
    Relevant Issue: https://github.com/BerriAI/litellm/issues/6012
    """
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_current_weather",
                "description": "Get the current weather in New York",
            },
        }
    ]
    messages = [
        {"role": "user", "content": "What is the current temperature in New York?"}
    ]
    completion(model=model, messages=messages, tools=tools, tool_choice="auto")
--- a/tests/local_testing/test_text_completion.py
+++ b/tests/local_testing/test_text_completion.py
@ -4019,7 +4019,7 @@ def test_async_text_completion():
    asyncio.run(test_get_response())
-@pytest.mark.skip(reason="Skip flaky tgai test")
+@pytest.mark.flaky(retries=6, delay=1)
 def test_async_text_completion_together_ai():
    litellm.set_verbose = True
    print("test_async_text_completion")
@ -4032,6 +4032,8 @@ def test_async_text_completion_together_ai():
                max_tokens=10,
            )
            print(f"response: {response}")
        except litellm.RateLimitError as e:
            print(e)
        except litellm.Timeout as e:
            print(e)
        except Exception as e: