From 4657a40ef1238f6301e38a1faa9973c263207c0a Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Thu, 12 Sep 2024 23:04:06 -0700
Subject: [PATCH] LiteLLM Minor Fixes and Improvements (09/12/2024)  (#5658)

* fix(factory.py): handle tool call content as list

Fixes https://github.com/BerriAI/litellm/issues/5652

* fix(factory.py): enforce stronger typing

* fix(router.py): return model alias in /v1/model/info and /v1/model_group/info

* fix(user_api_key_auth.py): move noisy warning message to debug

cleanup logs

* fix(types.py): cleanup pydantic v2 deprecated param

Fixes https://github.com/BerriAI/litellm/issues/5649

* docs(gemini.md): show how to pass inline data to gemini api

Fixes https://github.com/BerriAI/litellm/issues/5674
---
 .pre-commit-config.yaml                       |  12 +-
 docs/my-website/docs/providers/gemini.md      | 125 ++++++++++++++++++
 litellm/llms/prompt_templates/factory.py      |  66 ++++++---
 .../vertex_ai_non_gemini.py                   |   6 +-
 litellm/proxy/auth/auth_checks.py             |   2 +-
 litellm/proxy/auth/user_api_key_auth.py       |   4 +-
 litellm/proxy/proxy_server.py                 |   2 +-
 litellm/rerank_api/types.py                   |   3 -
 litellm/router.py                             |   3 +-
 litellm/tests/test_bedrock_completion.py      |  55 ++++++++
 litellm/tests/test_function_calling.py        |   4 +-
 litellm/tests/test_proxy_server.py            |  69 ++++++++++
 litellm/types/completion.py                   |   7 +-
 litellm/types/llms/openai.py                  |   7 +-
 14 files changed, 324 insertions(+), 41 deletions(-)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d429bc6b8..a33473b72 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,12 +1,12 @@
 repos:
 -   repo: local
     hooks:
-    # -   id: mypy
-    #     name: mypy
-    #     entry: python3 -m mypy --ignore-missing-imports
-    #     language: system
-    #     types: [python]
-    #     files: ^litellm/
+    -   id: mypy
+        name: mypy
+        entry: python3 -m mypy --ignore-missing-imports
+        language: system
+        types: [python]
+        files: ^litellm/
     -   id: isort
         name: isort
         entry: isort
diff --git a/docs/my-website/docs/providers/gemini.md b/docs/my-website/docs/providers/gemini.md
index 4bc2235af..8a8d2a004 100644
--- a/docs/my-website/docs/providers/gemini.md
+++ b/docs/my-website/docs/providers/gemini.md
@@ -708,6 +708,131 @@ response = await client.chat.completions.create(
 </TabItem>
 </Tabs>
 
+## Usage - PDF / Videos / etc. Files
+
+### Inline Data (e.g. audio stream)
+
+LiteLLM follows the OpenAI format and accepts sending inline data as an encoded base64 string. 
+
+The format to follow is 
+
+```python
+data:<mime_type>;base64,<encoded_data>
+```
+
+** LITELLM CALL **
+
+```python
+import litellm
+from pathlib import Path
+import base64
+import os
+
+os.environ["GEMINI_API_KEY"] = "" 
+
+litellm.set_verbose = True # 👈 See Raw call 
+
+audio_bytes = Path("speech_vertex.mp3").read_bytes()
+encoded_data = base64.b64encode(audio_bytes).decode("utf-8")
+print("Audio Bytes = {}".format(audio_bytes))
+model = "gemini/gemini-1.5-flash"
+response = litellm.completion(
+    model=model,
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Please summarize the audio."},
+                {
+                    "type": "image_url",
+                    "image_url": "data:audio/mp3;base64,{}".format(encoded_data), # 👈 SET MIME_TYPE + DATA
+                },
+            ],
+        }
+    ],
+)
+```
+
+** Equivalent GOOGLE API CALL ** 
+
+```python
+# Initialize a Gemini model appropriate for your use case.
+model = genai.GenerativeModel('models/gemini-1.5-flash')
+
+# Create the prompt.
+prompt = "Please summarize the audio."
+
+# Load the samplesmall.mp3 file into a Python Blob object containing the audio
+# file's bytes and then pass the prompt and the audio to Gemini.
+response = model.generate_content([
+    prompt,
+    {
+        "mime_type": "audio/mp3",
+        "data": pathlib.Path('samplesmall.mp3').read_bytes()
+    }
+])
+
+# Output Gemini's response to the prompt and the inline audio.
+print(response.text)
+```
+
+### https:// file 
+
+```python
+import litellm
+import os
+
+os.environ["GEMINI_API_KEY"] = "" 
+
+litellm.set_verbose = True # 👈 See Raw call 
+
+model = "gemini/gemini-1.5-flash"
+response = litellm.completion(
+    model=model,
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Please summarize the file."},
+                {
+                    "type": "image_url",
+                    "image_url": "https://storage..." # 👈 SET THE IMG URL
+                },
+            ],
+        }
+    ],
+)
+```
+
+### gs:// file 
+
+```python
+import litellm
+import os
+
+os.environ["GEMINI_API_KEY"] = "" 
+
+litellm.set_verbose = True # 👈 See Raw call 
+
+model = "gemini/gemini-1.5-flash"
+response = litellm.completion(
+    model=model,
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Please summarize the file."},
+                {
+                    "type": "image_url",
+                    "image_url": "gs://..." # 👈 SET THE cloud storage bucket url
+                },
+            ],
+        }
+    ],
+)
+```
+
+
 ## Chat Models
 :::tip
 
diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index d2b9db037..3fc654d25 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -1131,7 +1131,14 @@ def convert_to_gemini_tool_call_result(
         "content": "function result goes here",
     }
     """
-    content = message.get("content", "")
+    content_str: str = ""
+    if isinstance(message["content"], str):
+        content_str = message["content"]
+    elif isinstance(message["content"], List):
+        content_list = message["content"]
+        for content in content_list:
+            if content["type"] == "text":
+                content_str += content["text"]
     name: Optional[str] = message.get("name", "")  # type: ignore
 
     # Recover name from last message with tool calls
@@ -1156,10 +1163,10 @@ def convert_to_gemini_tool_call_result(
 
     # We can't determine from openai message format whether it's a successful or
     # error call result so default to the successful result template
-    inferred_content_value = infer_protocol_value(value=content)
+    inferred_content_value = infer_protocol_value(value=content_str)
 
     _field = litellm.types.llms.vertex_ai.Field(
-        key="content", value={inferred_content_value: content}
+        key="content", value={inferred_content_value: content_str}
     )
 
     _function_call_args = litellm.types.llms.vertex_ai.FunctionCallArgs(fields=_field)
@@ -1174,7 +1181,7 @@ def convert_to_gemini_tool_call_result(
 
 
 def convert_to_anthropic_tool_result(
-    message: Union[dict, ChatCompletionToolMessage, ChatCompletionFunctionMessage]
+    message: Union[ChatCompletionToolMessage, ChatCompletionFunctionMessage]
 ) -> AnthropicMessagesToolResultParam:
     """
     OpenAI message with a tool result looks like:
@@ -1207,21 +1214,29 @@ def convert_to_anthropic_tool_result(
         ]
     }
     """
+    content_str: str = ""
+    if isinstance(message["content"], str):
+        content_str = message["content"]
+    elif isinstance(message["content"], List):
+        content_list = message["content"]
+        for content in content_list:
+            if content["type"] == "text":
+                content_str += content["text"]
     if message["role"] == "tool":
-        tool_call_id: str = message.get("tool_call_id")  # type: ignore
-        content: str = message.get("content")  # type: ignore
+        tool_message: ChatCompletionToolMessage = message
+        tool_call_id: str = tool_message["tool_call_id"]
 
         # We can't determine from openai message format whether it's a successful or
         # error call result so default to the successful result template
         anthropic_tool_result = AnthropicMessagesToolResultParam(
-            type="tool_result", tool_use_id=tool_call_id, content=content
+            type="tool_result", tool_use_id=tool_call_id, content=content_str
         )
         return anthropic_tool_result
     if message["role"] == "function":
-        content = message.get("content")  # type: ignore
-        tool_call_id = message.get("tool_call_id") or str(uuid.uuid4())  # type: ignore
+        function_message: ChatCompletionFunctionMessage = message
+        tool_call_id = function_message.get("tool_call_id") or str(uuid.uuid4())
         anthropic_tool_result = AnthropicMessagesToolResultParam(
-            type="tool_result", tool_use_id=tool_call_id, content=content
+            type="tool_result", tool_use_id=tool_call_id, content=content_str
         )
 
         return anthropic_tool_result
@@ -1624,7 +1639,8 @@ from litellm.types.llms.cohere import (
 
 
 def convert_openai_message_to_cohere_tool_result(
-    message, tool_calls: List
+    message: Union[ChatCompletionToolMessage, ChatCompletionFunctionMessage],
+    tool_calls: List,
 ) -> ToolResultObject:
     """
     OpenAI message with a tool result looks like:
@@ -1660,7 +1676,15 @@ def convert_openai_message_to_cohere_tool_result(
        ]
    },
     """
-    content_str: str = message.get("content", "")
+
+    content_str: str = ""
+    if isinstance(message["content"], str):
+        content_str = message["content"]
+    elif isinstance(message["content"], List):
+        content_list = message["content"]
+        for content in content_list:
+            if content["type"] == "text":
+                content_str += content["text"]
     if len(content_str) > 0:
         try:
             content = json.loads(content_str)
@@ -1687,7 +1711,8 @@ def convert_openai_message_to_cohere_tool_result(
                     arguments = json.loads(arguments_str)
 
     if message["role"] == "function":
-        name = message.get("name")
+        function_message: ChatCompletionFunctionMessage = message
+        name = function_message["name"]
         cohere_tool_result: ToolResultObject = {
             "call": CallObject(name=name, parameters=arguments),
             "outputs": [content],
@@ -2292,7 +2317,7 @@ def _convert_to_bedrock_tool_call_invoke(
 
 
 def _convert_to_bedrock_tool_call_result(
-    message: dict,
+    message: Union[ChatCompletionToolMessage, ChatCompletionFunctionMessage]
 ) -> BedrockContentBlock:
     """
     OpenAI message with a tool result looks like:
@@ -2334,11 +2359,18 @@ def _convert_to_bedrock_tool_call_result(
     """
     - 
     """
-    content = message.get("content", "")
+    content_str: str = ""
+    if isinstance(message["content"], str):
+        content_str = message["content"]
+    elif isinstance(message["content"], List):
+        content_list = message["content"]
+        for content in content_list:
+            if content["type"] == "text":
+                content_str += content["text"]
     name = message.get("name", "")
-    id = message.get("tool_call_id", str(uuid.uuid4()))
+    id = str(message.get("tool_call_id", str(uuid.uuid4())))
 
-    tool_result_content_block = BedrockToolResultContentBlock(text=content)
+    tool_result_content_block = BedrockToolResultContentBlock(text=content_str)
     tool_result = BedrockToolResultBlock(
         content=[tool_result_content_block],
         toolUseId=id,
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py
index b8e4ab130..971c313ec 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py
@@ -160,7 +160,11 @@ def _gemini_convert_messages_with_history(
                                 _part = PartType(text=element["text"])  # type: ignore
                                 _parts.append(_part)
                             elif element["type"] == "image_url":
-                                image_url = element["image_url"]["url"]  # type: ignore
+                                img_element: ChatCompletionImageObject = element  # type: ignore
+                                if isinstance(img_element["image_url"], dict):
+                                    image_url = img_element["image_url"]["url"]
+                                else:
+                                    image_url = img_element["image_url"]
                                 _part = _process_gemini_image(image_url=image_url)
                                 _parts.append(_part)  # type: ignore
                     user_content.extend(_parts)
diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py
index 180fa309b..917885bc2 100644
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@@ -496,7 +496,7 @@ async def get_team_object(
 
     if check_cache_only:
         raise Exception(
-            f"Team doesn't exist in cache + check_cache_only=True. Team={team_id}. Create team via `/team/new` call."
+            f"Team doesn't exist in cache + check_cache_only=True. Team={team_id}."
         )
 
     # else, check db
diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
index 114f27d44..ebe6853ac 100644
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@@ -568,7 +568,9 @@ async def user_api_key_auth(
                         if field_name in valid_token.__fields__:
                             setattr(valid_token, field_name, v)
             except Exception as e:
-                verbose_logger.warning(e)
+                verbose_logger.debug(
+                    e
+                )  # moving from .warning to .debug as it spams logs when team missing from cache.
 
         try:
             is_master_key_valid = secrets.compare_digest(api_key, master_key)  # type: ignore
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index c2907b61f..51df7de87 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -3021,7 +3021,7 @@ async def startup_event():
 @router.get(
     "/models", dependencies=[Depends(user_api_key_auth)], tags=["model management"]
 )  # if project requires model list
-def model_list(
+async def model_list(
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
     """
diff --git a/litellm/rerank_api/types.py b/litellm/rerank_api/types.py
index 00cb32c18..dae79f875 100644
--- a/litellm/rerank_api/types.py
+++ b/litellm/rerank_api/types.py
@@ -25,9 +25,6 @@ class RerankResponse(BaseModel):
     meta: dict  # Contains api_version and billed_units
     _hidden_params: dict = {}
 
-    class Config:
-        underscore_attrs_are_private = True
-
     def __getitem__(self, key):
         return self.__dict__[key]
 
diff --git a/litellm/router.py b/litellm/router.py
index 8f603c561..1628a633a 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -4648,13 +4648,12 @@ class Router:
 
         Used for accurate 'get_model_list'.
         """
-
         returned_models: List[DeploymentTypedDict] = []
         for model in self.model_list:
             if model["model_name"] == model_name:
                 if model_alias is not None:
                     alias_model = copy.deepcopy(model)
-                    alias_model["model_name"] = model_name
+                    alias_model["model_name"] = model_alias
                     returned_models.append(alias_model)
                 else:
                     returned_models.append(model)
diff --git a/litellm/tests/test_bedrock_completion.py b/litellm/tests/test_bedrock_completion.py
index bc27c5118..e949d1ee7 100644
--- a/litellm/tests/test_bedrock_completion.py
+++ b/litellm/tests/test_bedrock_completion.py
@@ -5,6 +5,8 @@ import traceback
 
 from dotenv import load_dotenv
 
+import litellm.types
+
 load_dotenv()
 import io
 import os
@@ -1232,3 +1234,56 @@ def test_bedrock_cross_region_inference():
         max_tokens=10,
         temperature=0.1,
     )
+
+
+from litellm.llms.prompt_templates.factory import _bedrock_converse_messages_pt
+
+
+def test_bedrock_converse_translation_tool_message():
+    from litellm.types.utils import ChatCompletionMessageToolCall, Function
+
+    litellm.set_verbose = True
+
+    messages = [
+        {
+            "role": "user",
+            "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
+        },
+        {
+            "tool_call_id": "tooluse_DnqEmD5qR6y2-aJ-Xd05xw",
+            "role": "tool",
+            "name": "get_current_weather",
+            "content": [
+                {
+                    "text": '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}',
+                    "type": "text",
+                }
+            ],
+        },
+    ]
+
+    translated_msg = _bedrock_converse_messages_pt(
+        messages=messages, model="", llm_provider=""
+    )
+
+    print(translated_msg)
+    assert translated_msg == [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "text": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"
+                },
+                {
+                    "toolResult": {
+                        "content": [
+                            {
+                                "text": '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'
+                            }
+                        ],
+                        "toolUseId": "tooluse_DnqEmD5qR6y2-aJ-Xd05xw",
+                    }
+                },
+            ],
+        }
+    ]
diff --git a/litellm/tests/test_function_calling.py b/litellm/tests/test_function_calling.py
index f30f713ea..67d4fe6c9 100644
--- a/litellm/tests/test_function_calling.py
+++ b/litellm/tests/test_function_calling.py
@@ -48,8 +48,8 @@ def get_current_weather(location, unit="fahrenheit"):
         # "gpt-3.5-turbo-1106",
         # "mistral/mistral-large-latest",
         # "claude-3-haiku-20240307",
-        "gemini/gemini-1.5-pro",
-        # "anthropic.claude-3-sonnet-20240229-v1:0",
+        # "gemini/gemini-1.5-pro",
+        "anthropic.claude-3-sonnet-20240229-v1:0",
     ],
 )
 @pytest.mark.flaky(retries=3, delay=1)
diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py
index 102c126d1..8dc82a595 100644
--- a/litellm/tests/test_proxy_server.py
+++ b/litellm/tests/test_proxy_server.py
@@ -1432,3 +1432,72 @@ async def test_gemini_pass_through_endpoint():
     )
 
     print(resp.body)
+
+
+@pytest.mark.asyncio
+async def test_proxy_model_group_alias_checks(prisma_client):
+    """
+    Check if model group alias is returned on
+
+    `/v1/models`
+    `/v1/model/info`
+    `/v1/model_group/info`
+    """
+    import json
+
+    from fastapi import HTTPException, Request, Response
+    from starlette.datastructures import URL
+
+    from litellm.proxy.proxy_server import model_group_info, model_info_v1, model_list
+
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    await litellm.proxy.proxy_server.prisma_client.connect()
+
+    proxy_config = getattr(litellm.proxy.proxy_server, "proxy_config")
+
+    _model_list = [
+        {
+            "model_name": "gpt-3.5-turbo",
+            "litellm_params": {"model": "gpt-3.5-turbo"},
+        }
+    ]
+    model_alias = "gpt-4"
+    router = litellm.Router(
+        model_list=_model_list,
+        model_group_alias={model_alias: "gpt-3.5-turbo"},
+    )
+    setattr(litellm.proxy.proxy_server, "llm_router", router)
+    setattr(litellm.proxy.proxy_server, "llm_model_list", _model_list)
+
+    request = Request(scope={"type": "http", "method": "POST", "headers": {}})
+    request._url = URL(url="/v1/models")
+
+    resp = await model_list(
+        user_api_key_dict=UserAPIKeyAuth(models=[]),
+    )
+
+    assert len(resp) == 2
+    print(resp)
+
+    resp = await model_info_v1(
+        user_api_key_dict=UserAPIKeyAuth(models=[]),
+    )
+    models = resp["data"]
+    is_model_alias_in_list = False
+    for item in models:
+        if model_alias == item["model_name"]:
+            is_model_alias_in_list = True
+
+    assert is_model_alias_in_list
+
+    resp = await model_group_info(
+        user_api_key_dict=UserAPIKeyAuth(models=[]),
+    )
+    models = resp["data"]
+    is_model_alias_in_list = False
+    for item in models:
+        if model_alias == item.model_group:
+            is_model_alias_in_list = True
+
+    assert is_model_alias_in_list
diff --git a/litellm/types/completion.py b/litellm/types/completion.py
index c8ddc7449..7b5ed4e50 100644
--- a/litellm/types/completion.py
+++ b/litellm/types/completion.py
@@ -1,7 +1,6 @@
-from typing import List, Optional, Union, Iterable
+from typing import Iterable, List, Optional, Union
 
 from pydantic import BaseModel, ConfigDict, validator
-
 from typing_extensions import Literal, Required, TypedDict
 
 
@@ -94,7 +93,7 @@ class Function(TypedDict, total=False):
 
 
 class ChatCompletionToolMessageParam(TypedDict, total=False):
-    content: Required[str]
+    content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
     """The contents of the tool message."""
 
     role: Required[Literal["tool"]]
@@ -105,7 +104,7 @@ class ChatCompletionToolMessageParam(TypedDict, total=False):
 
 
 class ChatCompletionFunctionMessageParam(TypedDict, total=False):
-    content: Required[Optional[str]]
+    content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
     """The contents of the function message."""
 
     name: Required[str]
diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
index 9d65fe87e..f2048cfea 100644
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@@ -340,7 +340,7 @@ class ChatCompletionImageUrlObject(TypedDict, total=False):
 
 class ChatCompletionImageObject(TypedDict):
     type: Literal["image_url"]
-    image_url: ChatCompletionImageUrlObject
+    image_url: Union[str, ChatCompletionImageUrlObject]
 
 
 class OpenAIChatCompletionUserMessage(TypedDict):
@@ -368,14 +368,15 @@ class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total
 
 class ChatCompletionToolMessage(TypedDict):
     role: Literal["tool"]
-    content: str
+    content: Union[str, Iterable[ChatCompletionTextObject]]
     tool_call_id: str
 
 
 class ChatCompletionFunctionMessage(TypedDict):
     role: Literal["function"]
-    content: Optional[str]
+    content: Optional[Union[str, Iterable[ChatCompletionTextObject]]]
     name: str
+    tool_call_id: Optional[str]
 
 
 class OpenAIChatCompletionSystemMessage(TypedDict, total=False):