From c910a32439dd1f251310d3c2ed94e10a736c45fb Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Thu, 5 Sep 2024 18:03:34 -0700
Subject: [PATCH] LiteLLM Minor Fixes and Improvements  (#5537)

* fix(vertex_ai): Fixes issue where multimodal message without text was failing vertex calls

Fixes https://github.com/BerriAI/litellm/issues/5515

* fix(azure.py): move to using httphandler for oidc token calls

Fixes issue where ssl certificates weren't being picked up as expected

Closes https://github.com/BerriAI/litellm/issues/5522

* feat: Allows admin to set a default_max_internal_user_budget in config, and allow setting more specific values as env vars

* fix(proxy_server.py): fix read for max_internal_user_budget

* build(model_prices_and_context_window.json): add regional gpt-4o-2024-08-06 pricing

Closes https://github.com/BerriAI/litellm/issues/5540

* test: skip re-test
---
 litellm/__init__.py                           |  1 +
 litellm/llms/AzureOpenAI/azure.py             |  3 +-
 .../common_utils.py                           | 18 +++++++-
 .../vertex_ai_non_gemini.py                   | 15 +++++++
 ...odel_prices_and_context_window_backup.json | 12 +++++
 litellm/proxy/_new_secret_config.yaml         |  4 +-
 litellm/proxy/proxy_server.py                 |  8 ++++
 .../tests/test_amazing_vertex_completion.py   |  5 ++-
 litellm/tests/test_prompt_factory.py          | 44 +++++++++++++++++++
 model_prices_and_context_window.json          | 12 +++++
 10 files changed, 117 insertions(+), 5 deletions(-)

diff --git a/litellm/__init__.py b/litellm/__init__.py
index ce753b1109..25cae83282 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -257,6 +257,7 @@ upperbound_key_generate_params: Optional[LiteLLM_UpperboundKeyGenerateParams] =
 default_user_params: Optional[Dict] = None
 default_team_settings: Optional[List] = None
 max_user_budget: Optional[float] = None
+default_max_internal_user_budget: Optional[float] = None
 max_internal_user_budget: Optional[float] = None
 internal_user_budget_duration: Optional[str] = None
 max_end_user_budget: Optional[float] = None
diff --git a/litellm/llms/AzureOpenAI/azure.py b/litellm/llms/AzureOpenAI/azure.py
index 098086c8b9..70f13375d2 100644
--- a/litellm/llms/AzureOpenAI/azure.py
+++ b/litellm/llms/AzureOpenAI/azure.py
@@ -402,7 +402,8 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
     if azure_ad_token_access_token is not None:
         return azure_ad_token_access_token
 
-    req_token = httpx.post(
+    client = litellm.module_level_client
+    req_token = client.post(
         f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token",
         data={
             "client_id": azure_client_id,
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/common_utils.py b/litellm/llms/vertex_ai_and_google_ai_studio/common_utils.py
index 2fef2233c0..8604d03836 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/common_utils.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/common_utils.py
@@ -1,8 +1,9 @@
-from typing import Literal, Tuple
+from typing import List, Literal, Tuple
 
 import httpx
 
 from litellm import supports_system_messages, verbose_logger
+from litellm.types.llms.vertex_ai import PartType
 
 
 class VertexAIError(Exception):
@@ -108,3 +109,18 @@ def _get_gemini_url(
         )
 
     return url, endpoint
+
+
+def _check_text_in_content(parts: List[PartType]) -> bool:
+    """
+    check that user_content has 'text' parameter.
+        - Known Vertex Error: Unable to submit request because it must have a text parameter.
+        - 'text' param needs to be len > 0
+        - Relevant Issue: https://github.com/BerriAI/litellm/issues/5515
+    """
+    has_text_param = False
+    for part in parts:
+        if "text" in part and part.get("text"):
+            has_text_param = True
+
+    return has_text_param
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py
index 44367c5f09..b8e4ab1309 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py
@@ -29,6 +29,8 @@ from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantM
 from litellm.types.llms.vertex_ai import *
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
 
+from .common_utils import _check_text_in_content
+
 
 class VertexAIError(Exception):
     def __init__(self, status_code, message):
@@ -173,6 +175,19 @@ def _gemini_convert_messages_with_history(
                 msg_i += 1
 
             if user_content:
+                """
+                check that user_content has 'text' parameter.
+                    - Known Vertex Error: Unable to submit request because it must have a text parameter.
+                    - Relevant Issue: https://github.com/BerriAI/litellm/issues/5515
+                """
+                has_text_in_content = _check_text_in_content(user_content)
+                if has_text_in_content is False:
+                    verbose_logger.warning(
+                        "No text in user content. Adding a blank text to user content, to ensure Gemini doesn't fail the request. Relevant Issue - https://github.com/BerriAI/litellm/issues/5515"
+                    )
+                    user_content.append(
+                        PartType(text=" ")
+                    )  # add a blank text, to ensure Gemini doesn't fail the request.
                 contents.append(ContentType(role="user", parts=user_content))
             assistant_content = []
             ## MERGE CONSECUTIVE ASSISTANT CONTENT ##
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index fd5d8feadc..b58725d5f9 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -535,6 +535,18 @@
         "supports_vision": true
     },
     "azure/gpt-4o-2024-08-06": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.00000275,
+        "output_cost_per_token": 0.000011,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "azure/global-standard/gpt-4o-2024-08-06": {
         "max_tokens": 16384,
         "max_input_tokens": 128000,
         "max_output_tokens": 16384,
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 515de17988..51a995285c 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,5 +1,7 @@
-
 model_list:
   - model_name: "*"
     litellm_params:
       model: openai/*
+
+litellm_settings:
+  default_max_internal_user_budget: 2
\ No newline at end of file
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index b0eab1ba8d..8d7c524a41 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1645,6 +1645,14 @@ class ProxyConfig:
                     verbose_proxy_logger.debug(
                         f"litellm.post_call_rules: {litellm.post_call_rules}"
                     )
+                elif key == "max_internal_user_budget":
+                    litellm.max_internal_user_budget = float(value)  # type: ignore
+                elif key == "default_max_internal_user_budget":
+                    litellm.default_max_internal_user_budget = float(value)
+                    if litellm.max_internal_user_budget is None:
+                        litellm.max_internal_user_budget = (
+                            litellm.default_max_internal_user_budget
+                        )
                 elif key == "custom_provider_map":
                     from litellm.utils import custom_llm_setup
 
diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index 7d956854e7..9b9eee2111 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -655,12 +655,11 @@ def test_gemini_pro_vision_base64():
     try:
         load_vertex_ai_credentials()
         litellm.set_verbose = True
-        litellm.num_retries = 3
         image_path = "../proxy/cached_logo.jpg"
         # Getting the base64 string
         base64_image = encode_image(image_path)
         resp = litellm.completion(
-            model="vertex_ai/gemini-pro-vision",
+            model="vertex_ai/gemini-1.5-pro",
             messages=[
                 {
                     "role": "user",
@@ -679,6 +678,8 @@ def test_gemini_pro_vision_base64():
         print(resp)
 
         prompt_tokens = resp.usage.prompt_tokens
+    except litellm.InternalServerError:
+        pass
     except litellm.RateLimitError as e:
         pass
     except Exception as e:
diff --git a/litellm/tests/test_prompt_factory.py b/litellm/tests/test_prompt_factory.py
index 81339e8318..4c99efb3eb 100644
--- a/litellm/tests/test_prompt_factory.py
+++ b/litellm/tests/test_prompt_factory.py
@@ -22,6 +22,9 @@ from litellm.llms.prompt_templates.factory import (
     llama_2_chat_pt,
     prompt_factory,
 )
+from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_non_gemini import (
+    _gemini_convert_messages_with_history,
+)
 
 
 def test_llama_3_prompt():
@@ -388,3 +391,44 @@ def test_bedrock_parallel_tool_calling_pt(provider):
         translated_messages[number_of_messages - 1]["role"]
         != translated_messages[number_of_messages - 2]["role"]
     )
+
+
+def test_vertex_only_image_user_message():
+    base64_image = "/9j/2wCEAAgGBgcGBQ"
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
+                },
+            ],
+        },
+    ]
+
+    response = _gemini_convert_messages_with_history(messages=messages)
+
+    expected_response = [
+        {
+            "role": "user",
+            "parts": [
+                {
+                    "inline_data": {
+                        "data": "/9j/2wCEAAgGBgcGBQ",
+                        "mime_type": "image/jpeg",
+                    }
+                },
+                {"text": " "},
+            ],
+        }
+    ]
+
+    assert len(response) == len(expected_response)
+    for idx, content in enumerate(response):
+        assert (
+            content == expected_response[idx]
+        ), "Invalid gemini input. Got={}, Expected={}".format(
+            content, expected_response[idx]
+        )
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index fd5d8feadc..b58725d5f9 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -535,6 +535,18 @@
         "supports_vision": true
     },
     "azure/gpt-4o-2024-08-06": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.00000275,
+        "output_cost_per_token": 0.000011,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "azure/global-standard/gpt-4o-2024-08-06": {
         "max_tokens": 16384,
         "max_input_tokens": 128000,
         "max_output_tokens": 16384,