mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
LiteLLM Minor Fixes and Improvements (#5537)
* fix(vertex_ai): Fixes issue where multimodal message without text was failing vertex calls Fixes https://github.com/BerriAI/litellm/issues/5515 * fix(azure.py): move to using httphandler for oidc token calls Fixes issue where ssl certificates weren't being picked up as expected Closes https://github.com/BerriAI/litellm/issues/5522 * feat: Allows admin to set a default_max_internal_user_budget in config, and allow setting more specific values as env vars * fix(proxy_server.py): fix read for max_internal_user_budget * build(model_prices_and_context_window.json): add regional gpt-4o-2024-08-06 pricing Closes https://github.com/BerriAI/litellm/issues/5540 * test: skip re-test
This commit is contained in:
parent
8b997e78a4
commit
c910a32439
10 changed files with 117 additions and 5 deletions
|
@ -257,6 +257,7 @@ upperbound_key_generate_params: Optional[LiteLLM_UpperboundKeyGenerateParams] =
|
||||||
default_user_params: Optional[Dict] = None
|
default_user_params: Optional[Dict] = None
|
||||||
default_team_settings: Optional[List] = None
|
default_team_settings: Optional[List] = None
|
||||||
max_user_budget: Optional[float] = None
|
max_user_budget: Optional[float] = None
|
||||||
|
default_max_internal_user_budget: Optional[float] = None
|
||||||
max_internal_user_budget: Optional[float] = None
|
max_internal_user_budget: Optional[float] = None
|
||||||
internal_user_budget_duration: Optional[str] = None
|
internal_user_budget_duration: Optional[str] = None
|
||||||
max_end_user_budget: Optional[float] = None
|
max_end_user_budget: Optional[float] = None
|
||||||
|
|
|
@ -402,7 +402,8 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
|
||||||
if azure_ad_token_access_token is not None:
|
if azure_ad_token_access_token is not None:
|
||||||
return azure_ad_token_access_token
|
return azure_ad_token_access_token
|
||||||
|
|
||||||
req_token = httpx.post(
|
client = litellm.module_level_client
|
||||||
|
req_token = client.post(
|
||||||
f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token",
|
f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token",
|
||||||
data={
|
data={
|
||||||
"client_id": azure_client_id,
|
"client_id": azure_client_id,
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
from typing import Literal, Tuple
|
from typing import List, Literal, Tuple
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from litellm import supports_system_messages, verbose_logger
|
from litellm import supports_system_messages, verbose_logger
|
||||||
|
from litellm.types.llms.vertex_ai import PartType
|
||||||
|
|
||||||
|
|
||||||
class VertexAIError(Exception):
|
class VertexAIError(Exception):
|
||||||
|
@ -108,3 +109,18 @@ def _get_gemini_url(
|
||||||
)
|
)
|
||||||
|
|
||||||
return url, endpoint
|
return url, endpoint
|
||||||
|
|
||||||
|
|
||||||
|
def _check_text_in_content(parts: List[PartType]) -> bool:
|
||||||
|
"""
|
||||||
|
check that user_content has 'text' parameter.
|
||||||
|
- Known Vertex Error: Unable to submit request because it must have a text parameter.
|
||||||
|
- 'text' param needs to be len > 0
|
||||||
|
- Relevant Issue: https://github.com/BerriAI/litellm/issues/5515
|
||||||
|
"""
|
||||||
|
has_text_param = False
|
||||||
|
for part in parts:
|
||||||
|
if "text" in part and part.get("text"):
|
||||||
|
has_text_param = True
|
||||||
|
|
||||||
|
return has_text_param
|
||||||
|
|
|
@ -29,6 +29,8 @@ from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantM
|
||||||
from litellm.types.llms.vertex_ai import *
|
from litellm.types.llms.vertex_ai import *
|
||||||
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
||||||
|
|
||||||
|
from .common_utils import _check_text_in_content
|
||||||
|
|
||||||
|
|
||||||
class VertexAIError(Exception):
|
class VertexAIError(Exception):
|
||||||
def __init__(self, status_code, message):
|
def __init__(self, status_code, message):
|
||||||
|
@ -173,6 +175,19 @@ def _gemini_convert_messages_with_history(
|
||||||
msg_i += 1
|
msg_i += 1
|
||||||
|
|
||||||
if user_content:
|
if user_content:
|
||||||
|
"""
|
||||||
|
check that user_content has 'text' parameter.
|
||||||
|
- Known Vertex Error: Unable to submit request because it must have a text parameter.
|
||||||
|
- Relevant Issue: https://github.com/BerriAI/litellm/issues/5515
|
||||||
|
"""
|
||||||
|
has_text_in_content = _check_text_in_content(user_content)
|
||||||
|
if has_text_in_content is False:
|
||||||
|
verbose_logger.warning(
|
||||||
|
"No text in user content. Adding a blank text to user content, to ensure Gemini doesn't fail the request. Relevant Issue - https://github.com/BerriAI/litellm/issues/5515"
|
||||||
|
)
|
||||||
|
user_content.append(
|
||||||
|
PartType(text=" ")
|
||||||
|
) # add a blank text, to ensure Gemini doesn't fail the request.
|
||||||
contents.append(ContentType(role="user", parts=user_content))
|
contents.append(ContentType(role="user", parts=user_content))
|
||||||
assistant_content = []
|
assistant_content = []
|
||||||
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
|
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
|
||||||
|
|
|
@ -535,6 +535,18 @@
|
||||||
"supports_vision": true
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
"azure/gpt-4o-2024-08-06": {
|
"azure/gpt-4o-2024-08-06": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 16384,
|
||||||
|
"input_cost_per_token": 0.00000275,
|
||||||
|
"output_cost_per_token": 0.000011,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_vision": true
|
||||||
|
},
|
||||||
|
"azure/global-standard/gpt-4o-2024-08-06": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
"max_output_tokens": 16384,
|
"max_output_tokens": 16384,
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
|
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: "*"
|
- model_name: "*"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/*
|
model: openai/*
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
default_max_internal_user_budget: 2
|
|
@ -1645,6 +1645,14 @@ class ProxyConfig:
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
f"litellm.post_call_rules: {litellm.post_call_rules}"
|
f"litellm.post_call_rules: {litellm.post_call_rules}"
|
||||||
)
|
)
|
||||||
|
elif key == "max_internal_user_budget":
|
||||||
|
litellm.max_internal_user_budget = float(value) # type: ignore
|
||||||
|
elif key == "default_max_internal_user_budget":
|
||||||
|
litellm.default_max_internal_user_budget = float(value)
|
||||||
|
if litellm.max_internal_user_budget is None:
|
||||||
|
litellm.max_internal_user_budget = (
|
||||||
|
litellm.default_max_internal_user_budget
|
||||||
|
)
|
||||||
elif key == "custom_provider_map":
|
elif key == "custom_provider_map":
|
||||||
from litellm.utils import custom_llm_setup
|
from litellm.utils import custom_llm_setup
|
||||||
|
|
||||||
|
|
|
@ -655,12 +655,11 @@ def test_gemini_pro_vision_base64():
|
||||||
try:
|
try:
|
||||||
load_vertex_ai_credentials()
|
load_vertex_ai_credentials()
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
litellm.num_retries = 3
|
|
||||||
image_path = "../proxy/cached_logo.jpg"
|
image_path = "../proxy/cached_logo.jpg"
|
||||||
# Getting the base64 string
|
# Getting the base64 string
|
||||||
base64_image = encode_image(image_path)
|
base64_image = encode_image(image_path)
|
||||||
resp = litellm.completion(
|
resp = litellm.completion(
|
||||||
model="vertex_ai/gemini-pro-vision",
|
model="vertex_ai/gemini-1.5-pro",
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
@ -679,6 +678,8 @@ def test_gemini_pro_vision_base64():
|
||||||
print(resp)
|
print(resp)
|
||||||
|
|
||||||
prompt_tokens = resp.usage.prompt_tokens
|
prompt_tokens = resp.usage.prompt_tokens
|
||||||
|
except litellm.InternalServerError:
|
||||||
|
pass
|
||||||
except litellm.RateLimitError as e:
|
except litellm.RateLimitError as e:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -22,6 +22,9 @@ from litellm.llms.prompt_templates.factory import (
|
||||||
llama_2_chat_pt,
|
llama_2_chat_pt,
|
||||||
prompt_factory,
|
prompt_factory,
|
||||||
)
|
)
|
||||||
|
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_non_gemini import (
|
||||||
|
_gemini_convert_messages_with_history,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_llama_3_prompt():
|
def test_llama_3_prompt():
|
||||||
|
@ -388,3 +391,44 @@ def test_bedrock_parallel_tool_calling_pt(provider):
|
||||||
translated_messages[number_of_messages - 1]["role"]
|
translated_messages[number_of_messages - 1]["role"]
|
||||||
!= translated_messages[number_of_messages - 2]["role"]
|
!= translated_messages[number_of_messages - 2]["role"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_vertex_only_image_user_message():
|
||||||
|
base64_image = "/9j/2wCEAAgGBgcGBQ"
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
response = _gemini_convert_messages_with_history(messages=messages)
|
||||||
|
|
||||||
|
expected_response = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"parts": [
|
||||||
|
{
|
||||||
|
"inline_data": {
|
||||||
|
"data": "/9j/2wCEAAgGBgcGBQ",
|
||||||
|
"mime_type": "image/jpeg",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{"text": " "},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
assert len(response) == len(expected_response)
|
||||||
|
for idx, content in enumerate(response):
|
||||||
|
assert (
|
||||||
|
content == expected_response[idx]
|
||||||
|
), "Invalid gemini input. Got={}, Expected={}".format(
|
||||||
|
content, expected_response[idx]
|
||||||
|
)
|
||||||
|
|
|
@ -535,6 +535,18 @@
|
||||||
"supports_vision": true
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
"azure/gpt-4o-2024-08-06": {
|
"azure/gpt-4o-2024-08-06": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 16384,
|
||||||
|
"input_cost_per_token": 0.00000275,
|
||||||
|
"output_cost_per_token": 0.000011,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_vision": true
|
||||||
|
},
|
||||||
|
"azure/global-standard/gpt-4o-2024-08-06": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
"max_output_tokens": 16384,
|
"max_output_tokens": 16384,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue