LiteLLM Minor Fixes and Improvements (#5537)

* fix(vertex_ai): Fixes issue where multimodal message without text was failing vertex calls Fixes https://github.com/BerriAI/litellm/issues/5515 * fix(azure.py): move to using httphandler for oidc token calls Fixes issue where ssl certificates weren't being picked up as expected Closes https://github.com/BerriAI/litellm/issues/5522 * feat: Allows admin to set a default_max_internal_user_budget in config, and allow setting more specific values as env vars * fix(proxy_server.py): fix read for max_internal_user_budget * build(model_prices_and_context_window.json): add regional gpt-4o-2024-08-06 pricing Closes https://github.com/BerriAI/litellm/issues/5540 * test: skip re-test
2024-09-05 18:03:34 -07:00 · 2024-09-05 18:03:34 -07:00 · ce67858ceb
commit ce67858ceb
parent bc20879c23
10 changed files with 117 additions and 5 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -257,6 +257,7 @@ upperbound_key_generate_params: Optional[LiteLLM_UpperboundKeyGenerateParams] =
 default_user_params: Optional[Dict] = None
 default_team_settings: Optional[List] = None
 max_user_budget: Optional[float] = None
+default_max_internal_user_budget: Optional[float] = None
 max_internal_user_budget: Optional[float] = None
 internal_user_budget_duration: Optional[str] = None
 max_end_user_budget: Optional[float] = None
--- a/litellm/llms/AzureOpenAI/azure.py
+++ b/litellm/llms/AzureOpenAI/azure.py
@ -402,7 +402,8 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
    if azure_ad_token_access_token is not None:
        return azure_ad_token_access_token

-    req_token = httpx.post(
+    client = litellm.module_level_client
+    req_token = client.post(
        f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token",
        data={
            "client_id": azure_client_id,
--- a/litellm/llms/vertex_ai_and_google_ai_studio/common_utils.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/common_utils.py
@ -1,8 +1,9 @@
-from typing import Literal, Tuple
+from typing import List, Literal, Tuple

 import httpx

 from litellm import supports_system_messages, verbose_logger
+from litellm.types.llms.vertex_ai import PartType


 class VertexAIError(Exception):
@ -108,3 +109,18 @@ def _get_gemini_url(
        )

    return url, endpoint
+
+
+def _check_text_in_content(parts: List[PartType]) -> bool:
+    """
+    check that user_content has 'text' parameter.
+        - Known Vertex Error: Unable to submit request because it must have a text parameter.
+        - 'text' param needs to be len > 0
+        - Relevant Issue: https://github.com/BerriAI/litellm/issues/5515
+    """
+    has_text_param = False
+    for part in parts:
+        if "text" in part and part.get("text"):
+            has_text_param = True
+
+    return has_text_param
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py
@ -29,6 +29,8 @@ from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantM
 from litellm.types.llms.vertex_ai import *
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage

+from .common_utils import _check_text_in_content
+

 class VertexAIError(Exception):
    def __init__(self, status_code, message):
@ -173,6 +175,19 @@ def _gemini_convert_messages_with_history(
                msg_i += 1

            if user_content:
+                """
+                check that user_content has 'text' parameter.
+                    - Known Vertex Error: Unable to submit request because it must have a text parameter.
+                    - Relevant Issue: https://github.com/BerriAI/litellm/issues/5515
+                """
+                has_text_in_content = _check_text_in_content(user_content)
+                if has_text_in_content is False:
+                    verbose_logger.warning(
+                        "No text in user content. Adding a blank text to user content, to ensure Gemini doesn't fail the request. Relevant Issue - https://github.com/BerriAI/litellm/issues/5515"
+                    )
+                    user_content.append(
+                        PartType(text=" ")
+                    )  # add a blank text, to ensure Gemini doesn't fail the request.
                contents.append(ContentType(role="user", parts=user_content))
            assistant_content = []
            ## MERGE CONSECUTIVE ASSISTANT CONTENT ##
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -535,6 +535,18 @@
        "supports_vision": true
    },
    "azure/gpt-4o-2024-08-06": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.00000275,
+        "output_cost_per_token": 0.000011,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "azure/global-standard/gpt-4o-2024-08-06": {
        "max_tokens": 16384,
        "max_input_tokens": 128000,
        "max_output_tokens": 16384,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,5 +1,7 @@
-
 model_list:
  - model_name: "*"
    litellm_params:
      model: openai/*
+
+litellm_settings:
+  default_max_internal_user_budget: 2
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1645,6 +1645,14 @@ class ProxyConfig:
                    verbose_proxy_logger.debug(
                        f"litellm.post_call_rules: {litellm.post_call_rules}"
                    )
+                elif key == "max_internal_user_budget":
+                    litellm.max_internal_user_budget = float(value)  # type: ignore
+                elif key == "default_max_internal_user_budget":
+                    litellm.default_max_internal_user_budget = float(value)
+                    if litellm.max_internal_user_budget is None:
+                        litellm.max_internal_user_budget = (
+                            litellm.default_max_internal_user_budget
+                        )
                elif key == "custom_provider_map":
                    from litellm.utils import custom_llm_setup

--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -655,12 +655,11 @@ def test_gemini_pro_vision_base64():
    try:
        load_vertex_ai_credentials()
        litellm.set_verbose = True
-        litellm.num_retries = 3
        image_path = "../proxy/cached_logo.jpg"
        # Getting the base64 string
        base64_image = encode_image(image_path)
        resp = litellm.completion(
-            model="vertex_ai/gemini-pro-vision",
+            model="vertex_ai/gemini-1.5-pro",
            messages=[
                {
                    "role": "user",
@ -679,6 +678,8 @@ def test_gemini_pro_vision_base64():
        print(resp)

        prompt_tokens = resp.usage.prompt_tokens
+    except litellm.InternalServerError:
+        pass
    except litellm.RateLimitError as e:
        pass
    except Exception as e:
--- a/litellm/tests/test_prompt_factory.py
+++ b/litellm/tests/test_prompt_factory.py
@ -22,6 +22,9 @@ from litellm.llms.prompt_templates.factory import (
    llama_2_chat_pt,
    prompt_factory,
 )
+from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_non_gemini import (
+    _gemini_convert_messages_with_history,
+)


 def test_llama_3_prompt():
@ -388,3 +391,44 @@ def test_bedrock_parallel_tool_calling_pt(provider):
        translated_messages[number_of_messages - 1]["role"]
        != translated_messages[number_of_messages - 2]["role"]
    )
+
+
+def test_vertex_only_image_user_message():
+    base64_image = "/9j/2wCEAAgGBgcGBQ"
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
+                },
+            ],
+        },
+    ]
+
+    response = _gemini_convert_messages_with_history(messages=messages)
+
+    expected_response = [
+        {
+            "role": "user",
+            "parts": [
+                {
+                    "inline_data": {
+                        "data": "/9j/2wCEAAgGBgcGBQ",
+                        "mime_type": "image/jpeg",
+                    }
+                },
+                {"text": " "},
+            ],
+        }
+    ]
+
+    assert len(response) == len(expected_response)
+    for idx, content in enumerate(response):
+        assert (
+            content == expected_response[idx]
+        ), "Invalid gemini input. Got={}, Expected={}".format(
+            content, expected_response[idx]
+        )
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -535,6 +535,18 @@
        "supports_vision": true
    },
    "azure/gpt-4o-2024-08-06": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.00000275,
+        "output_cost_per_token": 0.000011,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "azure/global-standard/gpt-4o-2024-08-06": {
        "max_tokens": 16384,
        "max_input_tokens": 128000,
        "max_output_tokens": 16384,