From c910a32439dd1f251310d3c2ed94e10a736c45fb Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Thu, 5 Sep 2024 18:03:34 -0700 Subject: [PATCH] LiteLLM Minor Fixes and Improvements (#5537) * fix(vertex_ai): Fixes issue where multimodal message without text was failing vertex calls Fixes https://github.com/BerriAI/litellm/issues/5515 * fix(azure.py): move to using httphandler for oidc token calls Fixes issue where ssl certificates weren't being picked up as expected Closes https://github.com/BerriAI/litellm/issues/5522 * feat: Allows admin to set a default_max_internal_user_budget in config, and allow setting more specific values as env vars * fix(proxy_server.py): fix read for max_internal_user_budget * build(model_prices_and_context_window.json): add regional gpt-4o-2024-08-06 pricing Closes https://github.com/BerriAI/litellm/issues/5540 * test: skip re-test --- litellm/__init__.py | 1 + litellm/llms/AzureOpenAI/azure.py | 3 +- .../common_utils.py | 18 +++++++- .../vertex_ai_non_gemini.py | 15 +++++++ ...odel_prices_and_context_window_backup.json | 12 +++++ litellm/proxy/_new_secret_config.yaml | 4 +- litellm/proxy/proxy_server.py | 8 ++++ .../tests/test_amazing_vertex_completion.py | 5 ++- litellm/tests/test_prompt_factory.py | 44 +++++++++++++++++++ model_prices_and_context_window.json | 12 +++++ 10 files changed, 117 insertions(+), 5 deletions(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index ce753b1109..25cae83282 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -257,6 +257,7 @@ upperbound_key_generate_params: Optional[LiteLLM_UpperboundKeyGenerateParams] = default_user_params: Optional[Dict] = None default_team_settings: Optional[List] = None max_user_budget: Optional[float] = None +default_max_internal_user_budget: Optional[float] = None max_internal_user_budget: Optional[float] = None internal_user_budget_duration: Optional[str] = None max_end_user_budget: Optional[float] = None diff --git a/litellm/llms/AzureOpenAI/azure.py b/litellm/llms/AzureOpenAI/azure.py index 098086c8b9..70f13375d2 100644 --- a/litellm/llms/AzureOpenAI/azure.py +++ b/litellm/llms/AzureOpenAI/azure.py @@ -402,7 +402,8 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str): if azure_ad_token_access_token is not None: return azure_ad_token_access_token - req_token = httpx.post( + client = litellm.module_level_client + req_token = client.post( f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token", data={ "client_id": azure_client_id, diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/common_utils.py b/litellm/llms/vertex_ai_and_google_ai_studio/common_utils.py index 2fef2233c0..8604d03836 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/common_utils.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/common_utils.py @@ -1,8 +1,9 @@ -from typing import Literal, Tuple +from typing import List, Literal, Tuple import httpx from litellm import supports_system_messages, verbose_logger +from litellm.types.llms.vertex_ai import PartType class VertexAIError(Exception): @@ -108,3 +109,18 @@ def _get_gemini_url( ) return url, endpoint + + +def _check_text_in_content(parts: List[PartType]) -> bool: + """ + check that user_content has 'text' parameter. + - Known Vertex Error: Unable to submit request because it must have a text parameter. + - 'text' param needs to be len > 0 + - Relevant Issue: https://github.com/BerriAI/litellm/issues/5515 + """ + has_text_param = False + for part in parts: + if "text" in part and part.get("text"): + has_text_param = True + + return has_text_param diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py index 44367c5f09..b8e4ab1309 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py @@ -29,6 +29,8 @@ from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantM from litellm.types.llms.vertex_ai import * from litellm.utils import CustomStreamWrapper, ModelResponse, Usage +from .common_utils import _check_text_in_content + class VertexAIError(Exception): def __init__(self, status_code, message): @@ -173,6 +175,19 @@ def _gemini_convert_messages_with_history( msg_i += 1 if user_content: + """ + check that user_content has 'text' parameter. + - Known Vertex Error: Unable to submit request because it must have a text parameter. + - Relevant Issue: https://github.com/BerriAI/litellm/issues/5515 + """ + has_text_in_content = _check_text_in_content(user_content) + if has_text_in_content is False: + verbose_logger.warning( + "No text in user content. Adding a blank text to user content, to ensure Gemini doesn't fail the request. Relevant Issue - https://github.com/BerriAI/litellm/issues/5515" + ) + user_content.append( + PartType(text=" ") + ) # add a blank text, to ensure Gemini doesn't fail the request. contents.append(ContentType(role="user", parts=user_content)) assistant_content = [] ## MERGE CONSECUTIVE ASSISTANT CONTENT ## diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index fd5d8feadc..b58725d5f9 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -535,6 +535,18 @@ "supports_vision": true }, "azure/gpt-4o-2024-08-06": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000275, + "output_cost_per_token": 0.000011, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "azure/global-standard/gpt-4o-2024-08-06": { "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 515de17988..51a995285c 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,5 +1,7 @@ - model_list: - model_name: "*" litellm_params: model: openai/* + +litellm_settings: + default_max_internal_user_budget: 2 \ No newline at end of file diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index b0eab1ba8d..8d7c524a41 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1645,6 +1645,14 @@ class ProxyConfig: verbose_proxy_logger.debug( f"litellm.post_call_rules: {litellm.post_call_rules}" ) + elif key == "max_internal_user_budget": + litellm.max_internal_user_budget = float(value) # type: ignore + elif key == "default_max_internal_user_budget": + litellm.default_max_internal_user_budget = float(value) + if litellm.max_internal_user_budget is None: + litellm.max_internal_user_budget = ( + litellm.default_max_internal_user_budget + ) elif key == "custom_provider_map": from litellm.utils import custom_llm_setup diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index 7d956854e7..9b9eee2111 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -655,12 +655,11 @@ def test_gemini_pro_vision_base64(): try: load_vertex_ai_credentials() litellm.set_verbose = True - litellm.num_retries = 3 image_path = "../proxy/cached_logo.jpg" # Getting the base64 string base64_image = encode_image(image_path) resp = litellm.completion( - model="vertex_ai/gemini-pro-vision", + model="vertex_ai/gemini-1.5-pro", messages=[ { "role": "user", @@ -679,6 +678,8 @@ def test_gemini_pro_vision_base64(): print(resp) prompt_tokens = resp.usage.prompt_tokens + except litellm.InternalServerError: + pass except litellm.RateLimitError as e: pass except Exception as e: diff --git a/litellm/tests/test_prompt_factory.py b/litellm/tests/test_prompt_factory.py index 81339e8318..4c99efb3eb 100644 --- a/litellm/tests/test_prompt_factory.py +++ b/litellm/tests/test_prompt_factory.py @@ -22,6 +22,9 @@ from litellm.llms.prompt_templates.factory import ( llama_2_chat_pt, prompt_factory, ) +from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_non_gemini import ( + _gemini_convert_messages_with_history, +) def test_llama_3_prompt(): @@ -388,3 +391,44 @@ def test_bedrock_parallel_tool_calling_pt(provider): translated_messages[number_of_messages - 1]["role"] != translated_messages[number_of_messages - 2]["role"] ) + + +def test_vertex_only_image_user_message(): + base64_image = "/9j/2wCEAAgGBgcGBQ" + + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, + }, + ], + }, + ] + + response = _gemini_convert_messages_with_history(messages=messages) + + expected_response = [ + { + "role": "user", + "parts": [ + { + "inline_data": { + "data": "/9j/2wCEAAgGBgcGBQ", + "mime_type": "image/jpeg", + } + }, + {"text": " "}, + ], + } + ] + + assert len(response) == len(expected_response) + for idx, content in enumerate(response): + assert ( + content == expected_response[idx] + ), "Invalid gemini input. Got={}, Expected={}".format( + content, expected_response[idx] + ) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index fd5d8feadc..b58725d5f9 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -535,6 +535,18 @@ "supports_vision": true }, "azure/gpt-4o-2024-08-06": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000275, + "output_cost_per_token": 0.000011, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "azure/global-standard/gpt-4o-2024-08-06": { "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384,