forked from phoenix/litellm-mirror
Merge pull request #4271 from BerriAI/litellm_vertex_httpx_fix
fix(vertex_httpx.py): Correctly handle Vertex content policy violation error
This commit is contained in:
commit
d96ffe8075
7 changed files with 309 additions and 37 deletions
|
@ -9,7 +9,7 @@ import types
|
||||||
import uuid
|
import uuid
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import Any, Callable, List, Literal, Optional, Tuple, Union
|
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
import httpx # type: ignore
|
import httpx # type: ignore
|
||||||
import ijson
|
import ijson
|
||||||
|
@ -241,6 +241,20 @@ class VertexGeminiConfig:
|
||||||
"europe-west9",
|
"europe-west9",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_flagged_finish_reasons(self) -> Dict[str, str]:
|
||||||
|
"""
|
||||||
|
Return Dictionary of finish reasons which indicate response was flagged
|
||||||
|
|
||||||
|
and what it means
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.",
|
||||||
|
"RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.",
|
||||||
|
"BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.",
|
||||||
|
"PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.",
|
||||||
|
"SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
async def make_call(
|
async def make_call(
|
||||||
client: Optional[AsyncHTTPHandler],
|
client: Optional[AsyncHTTPHandler],
|
||||||
|
@ -362,6 +376,27 @@ class VertexLLM(BaseLLM):
|
||||||
status_code=422,
|
status_code=422,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
## CHECK IF RESPONSE FLAGGED
|
||||||
|
if len(completion_response["candidates"]) > 0:
|
||||||
|
content_policy_violations = (
|
||||||
|
VertexGeminiConfig().get_flagged_finish_reasons()
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
"finishReason" in completion_response["candidates"][0]
|
||||||
|
and completion_response["candidates"][0]["finishReason"]
|
||||||
|
in content_policy_violations.keys()
|
||||||
|
):
|
||||||
|
## CONTENT POLICY VIOLATION ERROR
|
||||||
|
raise VertexAIError(
|
||||||
|
status_code=400,
|
||||||
|
message="The response was blocked. Reason={}. Raw Response={}".format(
|
||||||
|
content_policy_violations[
|
||||||
|
completion_response["candidates"][0]["finishReason"]
|
||||||
|
],
|
||||||
|
completion_response,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
model_response.choices = [] # type: ignore
|
model_response.choices = [] # type: ignore
|
||||||
|
|
||||||
## GET MODEL ##
|
## GET MODEL ##
|
||||||
|
@ -804,6 +839,7 @@ class VertexLLM(BaseLLM):
|
||||||
client = HTTPHandler(**_params) # type: ignore
|
client = HTTPHandler(**_params) # type: ignore
|
||||||
else:
|
else:
|
||||||
client = client
|
client = client
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = client.post(url=url, headers=headers, json=data) # type: ignore
|
response = client.post(url=url, headers=headers, json=data) # type: ignore
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
|
@ -1928,6 +1928,7 @@ def completion(
|
||||||
acompletion=acompletion,
|
acompletion=acompletion,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
client=client,
|
||||||
)
|
)
|
||||||
|
|
||||||
elif custom_llm_provider == "vertex_ai":
|
elif custom_llm_provider == "vertex_ai":
|
||||||
|
|
|
@ -1185,6 +1185,33 @@
|
||||||
"supports_tool_choice": true,
|
"supports_tool_choice": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
},
|
},
|
||||||
|
"gemini-1.5-flash": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 1000000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"max_images_per_prompt": 3000,
|
||||||
|
"max_videos_per_prompt": 10,
|
||||||
|
"max_video_length": 1,
|
||||||
|
"max_audio_length_hours": 8.4,
|
||||||
|
"max_audio_per_prompt": 1,
|
||||||
|
"max_pdf_size_mb": 30,
|
||||||
|
"input_cost_per_image": 0.0001315,
|
||||||
|
"input_cost_per_video_per_second": 0.0001315,
|
||||||
|
"input_cost_per_audio_per_second": 0.000125,
|
||||||
|
"input_cost_per_token": 0.00000003125,
|
||||||
|
"input_cost_per_token_above_128k_tokens": 0.0000000625,
|
||||||
|
"output_cost_per_token": 0.00000009375,
|
||||||
|
"output_cost_per_token_above_128k_tokens": 0.0000001875,
|
||||||
|
"output_cost_per_image": 0.000263,
|
||||||
|
"output_cost_per_video_per_second": 0.000263,
|
||||||
|
"output_cost_per_audio_per_second": 0.00025,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
},
|
||||||
"gemini-1.5-flash-001": {
|
"gemini-1.5-flash-001": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1000000,
|
"max_input_tokens": 1000000,
|
||||||
|
@ -1207,6 +1234,7 @@
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_audio_per_second": 0.00025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
@ -1233,6 +1261,7 @@
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_audio_per_second": 0.00025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
@ -1253,6 +1282,7 @@
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_audio_per_second": 0.00025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": true,
|
"supports_tool_choice": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
@ -1273,6 +1303,7 @@
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_audio_per_second": 0.00025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": true,
|
"supports_tool_choice": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
@ -1293,6 +1324,7 @@
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_audio_per_second": 0.00025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": true,
|
"supports_tool_choice": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
|
|
@ -15,6 +15,7 @@ import asyncio
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
@ -695,37 +696,161 @@ async def test_gemini_pro_function_calling_httpx(provider, sync_mode):
|
||||||
pytest.fail("An unexpected exception occurred - {}".format(str(e)))
|
pytest.fail("An unexpected exception occurred - {}".format(str(e)))
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
|
# @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
|
||||||
|
def vertex_httpx_mock_post(url, data=None, json=None, headers=None):
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.headers = {"Content-Type": "application/json"}
|
||||||
|
mock_response.json.return_value = {
|
||||||
|
"candidates": [
|
||||||
|
{
|
||||||
|
"finishReason": "RECITATION",
|
||||||
|
"safetyRatings": [
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
||||||
|
"probability": "NEGLIGIBLE",
|
||||||
|
"probabilityScore": 0.14965563,
|
||||||
|
"severity": "HARM_SEVERITY_NEGLIGIBLE",
|
||||||
|
"severityScore": 0.13660839,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
||||||
|
"probability": "NEGLIGIBLE",
|
||||||
|
"probabilityScore": 0.16344544,
|
||||||
|
"severity": "HARM_SEVERITY_NEGLIGIBLE",
|
||||||
|
"severityScore": 0.10230471,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_HARASSMENT",
|
||||||
|
"probability": "NEGLIGIBLE",
|
||||||
|
"probabilityScore": 0.1979091,
|
||||||
|
"severity": "HARM_SEVERITY_NEGLIGIBLE",
|
||||||
|
"severityScore": 0.06052939,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
||||||
|
"probability": "NEGLIGIBLE",
|
||||||
|
"probabilityScore": 0.1765296,
|
||||||
|
"severity": "HARM_SEVERITY_NEGLIGIBLE",
|
||||||
|
"severityScore": 0.18417984,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"citationMetadata": {
|
||||||
|
"citations": [
|
||||||
|
{
|
||||||
|
"startIndex": 251,
|
||||||
|
"endIndex": 380,
|
||||||
|
"uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 393,
|
||||||
|
"endIndex": 535,
|
||||||
|
"uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 439,
|
||||||
|
"endIndex": 581,
|
||||||
|
"uri": "https://mast-producing-trees.org/aldis-chocolate-chips-are-peanut-and-tree-nut-free/",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 1117,
|
||||||
|
"endIndex": 1265,
|
||||||
|
"uri": "https://github.com/frdrck100/To_Do_Assignments",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 1146,
|
||||||
|
"endIndex": 1288,
|
||||||
|
"uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 1166,
|
||||||
|
"endIndex": 1299,
|
||||||
|
"uri": "https://www.girlversusdough.com/brookies/",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 1780,
|
||||||
|
"endIndex": 1909,
|
||||||
|
"uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 1834,
|
||||||
|
"endIndex": 1964,
|
||||||
|
"uri": "https://newsd.in/national-cream-cheese-brownie-day-2023-date-history-how-to-make-a-cream-cheese-brownie/",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 1846,
|
||||||
|
"endIndex": 1989,
|
||||||
|
"uri": "https://github.com/frdrck100/To_Do_Assignments",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 2121,
|
||||||
|
"endIndex": 2261,
|
||||||
|
"uri": "https://recipes.net/copycat/hardee/hardees-chocolate-chip-cookie-recipe/",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 2505,
|
||||||
|
"endIndex": 2671,
|
||||||
|
"uri": "https://www.tfrecipes.com/Oranges%20with%20dried%20cherries/",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 3390,
|
||||||
|
"endIndex": 3529,
|
||||||
|
"uri": "https://github.com/quantumcognition/Crud-palm",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 3568,
|
||||||
|
"endIndex": 3724,
|
||||||
|
"uri": "https://recipes.net/dessert/cakes/ultimate-easy-gingerbread/",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startIndex": 3640,
|
||||||
|
"endIndex": 3770,
|
||||||
|
"uri": "https://recipes.net/dessert/cookies/soft-and-chewy-peanut-butter-cookies/",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usageMetadata": {"promptTokenCount": 336, "totalTokenCount": 336},
|
||||||
|
}
|
||||||
|
return mock_response
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai",
|
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai",
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_gemini_pro_json_schema_httpx(provider):
|
async def test_gemini_pro_json_schema_httpx_content_policy_error(provider):
|
||||||
load_vertex_ai_credentials()
|
load_vertex_ai_credentials()
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": """
|
"content": """
|
||||||
List 5 popular cookie recipes.
|
|
||||||
|
|
||||||
Using this JSON schema:
|
List 5 popular cookie recipes.
|
||||||
|
|
||||||
Recipe = {"recipe_name": str}
|
Using this JSON schema:
|
||||||
|
```json
|
||||||
Return a `list[Recipe]`
|
{'$defs': {'Recipe': {'properties': {'recipe_name': {'examples': ['Chocolate Chip Cookies', 'Peanut Butter Cookies'], 'maxLength': 100, 'title': 'The recipe name', 'type': 'string'}, 'estimated_time': {'anyOf': [{'minimum': 0, 'type': 'integer'}, {'type': 'null'}], 'default': None, 'description': 'The estimated time to make the recipe in minutes', 'examples': [30, 45], 'title': 'The estimated time'}, 'ingredients': {'examples': [['flour', 'sugar', 'chocolate chips'], ['peanut butter', 'sugar', 'eggs']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The ingredients', 'type': 'array'}, 'instructions': {'examples': [['mix', 'bake'], ['mix', 'chill', 'bake']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The instructions', 'type': 'array'}}, 'required': ['recipe_name', 'ingredients', 'instructions'], 'title': 'Recipe', 'type': 'object'}}, 'properties': {'recipes': {'items': {'$ref': '#/$defs/Recipe'}, 'maxItems': 11, 'title': 'The recipes', 'type': 'array'}}, 'required': ['recipes'], 'title': 'MyRecipes', 'type': 'object'}
|
||||||
|
```
|
||||||
""",
|
""",
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||||
|
|
||||||
|
client = HTTPHandler()
|
||||||
|
|
||||||
|
with patch.object(client, "post", side_effect=vertex_httpx_mock_post) as mock_call:
|
||||||
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
model="vertex_ai_beta/gemini-1.5-flash-preview-0514",
|
model="vertex_ai_beta/gemini-1.5-flash",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
response_format={"type": "json_object"},
|
response_format={"type": "json_object"},
|
||||||
|
client=client,
|
||||||
)
|
)
|
||||||
|
except litellm.ContentPolicyViolationError as e:
|
||||||
|
pass
|
||||||
|
|
||||||
assert response.choices[0].message.content is not None
|
mock_call.assert_called_once()
|
||||||
response_json = json.loads(response.choices[0].message.content)
|
|
||||||
|
|
||||||
assert isinstance(response_json, dict) or isinstance(response_json, list)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
|
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
|
||||||
|
|
|
@ -1,26 +1,26 @@
|
||||||
from openai import AuthenticationError, BadRequestError, RateLimitError, OpenAIError
|
import asyncio
|
||||||
import os
|
import os
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
import subprocess, asyncio
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError
|
||||||
|
|
||||||
sys.path.insert(
|
sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
|
||||||
from litellm import (
|
|
||||||
embedding,
|
|
||||||
completion,
|
|
||||||
# AuthenticationError,
|
|
||||||
ContextWindowExceededError,
|
|
||||||
# RateLimitError,
|
|
||||||
# ServiceUnavailableError,
|
|
||||||
# OpenAIError,
|
|
||||||
)
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from unittest.mock import patch, MagicMock
|
|
||||||
|
import litellm
|
||||||
|
from litellm import ( # AuthenticationError,; RateLimitError,; ServiceUnavailableError,; OpenAIError,
|
||||||
|
ContextWindowExceededError,
|
||||||
|
completion,
|
||||||
|
embedding,
|
||||||
|
)
|
||||||
|
|
||||||
litellm.vertex_project = "pathrise-convert-1606954137718"
|
litellm.vertex_project = "pathrise-convert-1606954137718"
|
||||||
litellm.vertex_location = "us-central1"
|
litellm.vertex_location = "us-central1"
|
||||||
|
@ -252,6 +252,7 @@ def test_completion_azure_exception():
|
||||||
async def asynctest_completion_azure_exception():
|
async def asynctest_completion_azure_exception():
|
||||||
try:
|
try:
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
|
|
||||||
print("azure gpt-3.5 test\n\n")
|
print("azure gpt-3.5 test\n\n")
|
||||||
|
@ -283,8 +284,11 @@ async def asynctest_completion_azure_exception():
|
||||||
|
|
||||||
def asynctest_completion_openai_exception_bad_model():
|
def asynctest_completion_openai_exception_bad_model():
|
||||||
try:
|
try:
|
||||||
|
import asyncio
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
import litellm, asyncio
|
|
||||||
|
import litellm
|
||||||
|
|
||||||
print("azure exception bad model\n\n")
|
print("azure exception bad model\n\n")
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
@ -311,8 +315,11 @@ def asynctest_completion_openai_exception_bad_model():
|
||||||
|
|
||||||
def asynctest_completion_azure_exception_bad_model():
|
def asynctest_completion_azure_exception_bad_model():
|
||||||
try:
|
try:
|
||||||
|
import asyncio
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
import litellm, asyncio
|
|
||||||
|
import litellm
|
||||||
|
|
||||||
print("azure exception bad model\n\n")
|
print("azure exception bad model\n\n")
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
@ -663,7 +670,7 @@ def test_litellm_predibase_exception():
|
||||||
# print(f"accuracy_score: {accuracy_score}")
|
# print(f"accuracy_score: {accuracy_score}")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("provider", ["predibase"])
|
@pytest.mark.parametrize("provider", ["predibase", "vertex_ai_beta"])
|
||||||
def test_exception_mapping(provider):
|
def test_exception_mapping(provider):
|
||||||
"""
|
"""
|
||||||
For predibase, run through a set of mock exceptions
|
For predibase, run through a set of mock exceptions
|
||||||
|
|
|
@ -6240,7 +6240,11 @@ def exception_type(
|
||||||
llm_provider="sagemaker",
|
llm_provider="sagemaker",
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "vertex_ai":
|
elif (
|
||||||
|
custom_llm_provider == "vertex_ai"
|
||||||
|
or custom_llm_provider == "vertex_ai_beta"
|
||||||
|
or custom_llm_provider == "gemini"
|
||||||
|
):
|
||||||
if (
|
if (
|
||||||
"Vertex AI API has not been used in project" in error_str
|
"Vertex AI API has not been used in project" in error_str
|
||||||
or "Unable to find your project" in error_str
|
or "Unable to find your project" in error_str
|
||||||
|
@ -6259,6 +6263,13 @@ def exception_type(
|
||||||
),
|
),
|
||||||
litellm_debug_info=extra_information,
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
|
if "400 Request payload size exceeds" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise ContextWindowExceededError(
|
||||||
|
message=f"VertexException - {error_str}",
|
||||||
|
model=model,
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
)
|
||||||
elif (
|
elif (
|
||||||
"None Unknown Error." in error_str
|
"None Unknown Error." in error_str
|
||||||
or "Content has no parts." in error_str
|
or "Content has no parts." in error_str
|
||||||
|
@ -6292,13 +6303,13 @@ def exception_type(
|
||||||
)
|
)
|
||||||
elif "The response was blocked." in error_str:
|
elif "The response was blocked." in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise UnprocessableEntityError(
|
raise ContentPolicyViolationError(
|
||||||
message=f"VertexAIException UnprocessableEntityError - {error_str}",
|
message=f"VertexAIException ContentPolicyViolationError - {error_str}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="vertex_ai",
|
llm_provider="vertex_ai",
|
||||||
litellm_debug_info=extra_information,
|
litellm_debug_info=extra_information,
|
||||||
response=httpx.Response(
|
response=httpx.Response(
|
||||||
status_code=422,
|
status_code=400,
|
||||||
request=httpx.Request(
|
request=httpx.Request(
|
||||||
method="POST",
|
method="POST",
|
||||||
url=" https://cloud.google.com/vertex-ai/",
|
url=" https://cloud.google.com/vertex-ai/",
|
||||||
|
@ -6350,6 +6361,27 @@ def exception_type(
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
if original_exception.status_code == 401:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise AuthenticationError(
|
||||||
|
message=f"VertexAIException - {original_exception.message}",
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
if original_exception.status_code == 404:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise NotFoundError(
|
||||||
|
message=f"VertexAIException - {original_exception.message}",
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
if original_exception.status_code == 408:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise Timeout(
|
||||||
|
message=f"VertexAIException - {original_exception.message}",
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
|
||||||
if original_exception.status_code == 429:
|
if original_exception.status_code == 429:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -6379,6 +6411,13 @@ def exception_type(
|
||||||
request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore
|
request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
if original_exception.status_code == 503:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise ServiceUnavailableError(
|
||||||
|
message=f"VertexAIException - {original_exception.message}",
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
|
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
|
||||||
if "503 Getting metadata" in error_str:
|
if "503 Getting metadata" in error_str:
|
||||||
# auth errors look like this
|
# auth errors look like this
|
||||||
|
|
|
@ -1185,6 +1185,33 @@
|
||||||
"supports_tool_choice": true,
|
"supports_tool_choice": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
},
|
},
|
||||||
|
"gemini-1.5-flash": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 1000000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"max_images_per_prompt": 3000,
|
||||||
|
"max_videos_per_prompt": 10,
|
||||||
|
"max_video_length": 1,
|
||||||
|
"max_audio_length_hours": 8.4,
|
||||||
|
"max_audio_per_prompt": 1,
|
||||||
|
"max_pdf_size_mb": 30,
|
||||||
|
"input_cost_per_image": 0.0001315,
|
||||||
|
"input_cost_per_video_per_second": 0.0001315,
|
||||||
|
"input_cost_per_audio_per_second": 0.000125,
|
||||||
|
"input_cost_per_token": 0.00000003125,
|
||||||
|
"input_cost_per_token_above_128k_tokens": 0.0000000625,
|
||||||
|
"output_cost_per_token": 0.00000009375,
|
||||||
|
"output_cost_per_token_above_128k_tokens": 0.0000001875,
|
||||||
|
"output_cost_per_image": 0.000263,
|
||||||
|
"output_cost_per_video_per_second": 0.000263,
|
||||||
|
"output_cost_per_audio_per_second": 0.00025,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
},
|
||||||
"gemini-1.5-flash-001": {
|
"gemini-1.5-flash-001": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1000000,
|
"max_input_tokens": 1000000,
|
||||||
|
@ -1207,6 +1234,7 @@
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_audio_per_second": 0.00025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
@ -1233,6 +1261,7 @@
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_audio_per_second": 0.00025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
@ -1253,6 +1282,7 @@
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_audio_per_second": 0.00025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": true,
|
"supports_tool_choice": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
@ -1273,6 +1303,7 @@
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_audio_per_second": 0.00025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": true,
|
"supports_tool_choice": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
@ -1293,6 +1324,7 @@
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_audio_per_second": 0.00025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": true,
|
"supports_tool_choice": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue