fix(vertex_httpx.py): re-raise vertex content policy violation error

Fixes https://github.com/BerriAI/litellm/issues/4270
This commit is contained in:
Krrish Dholakia 2024-06-18 19:00:35 -07:00
parent aef5cf3f22
commit f41c443abb
7 changed files with 309 additions and 37 deletions

View file

@ -9,7 +9,7 @@ import types
import uuid
from enum import Enum
from functools import partial
from typing import Any, Callable, List, Literal, Optional, Tuple, Union
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
import httpx # type: ignore
import ijson
@ -241,6 +241,20 @@ class VertexGeminiConfig:
"europe-west9",
]
def get_flagged_finish_reasons(self) -> Dict[str, str]:
"""
Return Dictionary of finish reasons which indicate response was flagged
and what it means
"""
return {
"SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.",
"RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.",
"BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.",
"PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.",
"SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
}
async def make_call(
client: Optional[AsyncHTTPHandler],
@ -362,6 +376,27 @@ class VertexLLM(BaseLLM):
status_code=422,
)
## CHECK IF RESPONSE FLAGGED
if len(completion_response["candidates"]) > 0:
content_policy_violations = (
VertexGeminiConfig().get_flagged_finish_reasons()
)
if (
"finishReason" in completion_response["candidates"][0]
and completion_response["candidates"][0]["finishReason"]
in content_policy_violations.keys()
):
## CONTENT POLICY VIOLATION ERROR
raise VertexAIError(
status_code=400,
message="The response was blocked. Reason={}. Raw Response={}".format(
content_policy_violations[
completion_response["candidates"][0]["finishReason"]
],
completion_response,
),
)
model_response.choices = [] # type: ignore
## GET MODEL ##
@ -804,6 +839,7 @@ class VertexLLM(BaseLLM):
client = HTTPHandler(**_params) # type: ignore
else:
client = client
try:
response = client.post(url=url, headers=headers, json=data) # type: ignore
response.raise_for_status()

View file

@ -1928,6 +1928,7 @@ def completion(
acompletion=acompletion,
timeout=timeout,
custom_llm_provider=custom_llm_provider,
client=client,
)
elif custom_llm_provider == "vertex_ai":

View file

@ -1185,6 +1185,33 @@
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
},
"gemini-1.5-flash": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315,
"input_cost_per_video_per_second": 0.0001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_token": 0.00000003125,
"input_cost_per_token_above_128k_tokens": 0.0000000625,
"output_cost_per_token": 0.00000009375,
"output_cost_per_token_above_128k_tokens": 0.0000001875,
"output_cost_per_image": 0.000263,
"output_cost_per_video_per_second": 0.000263,
"output_cost_per_audio_per_second": 0.00025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
},
"gemini-1.5-flash-001": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
@ -1207,6 +1234,7 @@
"output_cost_per_audio_per_second": 0.00025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1233,6 +1261,7 @@
"output_cost_per_audio_per_second": 0.00025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1253,6 +1282,7 @@
"output_cost_per_audio_per_second": 0.00025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1273,6 +1303,7 @@
"output_cost_per_audio_per_second": 0.00025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1293,6 +1324,7 @@
"output_cost_per_audio_per_second": 0.00025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"

View file

@ -15,6 +15,7 @@ import asyncio
import json
import os
import tempfile
from unittest.mock import MagicMock, patch
import pytest
@ -695,37 +696,161 @@ async def test_gemini_pro_function_calling_httpx(provider, sync_mode):
pytest.fail("An unexpected exception occurred - {}".format(str(e)))
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
# @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
def vertex_httpx_mock_post(url, data=None, json=None, headers=None):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.headers = {"Content-Type": "application/json"}
mock_response.json.return_value = {
"candidates": [
{
"finishReason": "RECITATION",
"safetyRatings": [
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"probability": "NEGLIGIBLE",
"probabilityScore": 0.14965563,
"severity": "HARM_SEVERITY_NEGLIGIBLE",
"severityScore": 0.13660839,
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"probability": "NEGLIGIBLE",
"probabilityScore": 0.16344544,
"severity": "HARM_SEVERITY_NEGLIGIBLE",
"severityScore": 0.10230471,
},
{
"category": "HARM_CATEGORY_HARASSMENT",
"probability": "NEGLIGIBLE",
"probabilityScore": 0.1979091,
"severity": "HARM_SEVERITY_NEGLIGIBLE",
"severityScore": 0.06052939,
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"probability": "NEGLIGIBLE",
"probabilityScore": 0.1765296,
"severity": "HARM_SEVERITY_NEGLIGIBLE",
"severityScore": 0.18417984,
},
],
"citationMetadata": {
"citations": [
{
"startIndex": 251,
"endIndex": 380,
"uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1",
},
{
"startIndex": 393,
"endIndex": 535,
"uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies",
},
{
"startIndex": 439,
"endIndex": 581,
"uri": "https://mast-producing-trees.org/aldis-chocolate-chips-are-peanut-and-tree-nut-free/",
},
{
"startIndex": 1117,
"endIndex": 1265,
"uri": "https://github.com/frdrck100/To_Do_Assignments",
},
{
"startIndex": 1146,
"endIndex": 1288,
"uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies",
},
{
"startIndex": 1166,
"endIndex": 1299,
"uri": "https://www.girlversusdough.com/brookies/",
},
{
"startIndex": 1780,
"endIndex": 1909,
"uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1",
},
{
"startIndex": 1834,
"endIndex": 1964,
"uri": "https://newsd.in/national-cream-cheese-brownie-day-2023-date-history-how-to-make-a-cream-cheese-brownie/",
},
{
"startIndex": 1846,
"endIndex": 1989,
"uri": "https://github.com/frdrck100/To_Do_Assignments",
},
{
"startIndex": 2121,
"endIndex": 2261,
"uri": "https://recipes.net/copycat/hardee/hardees-chocolate-chip-cookie-recipe/",
},
{
"startIndex": 2505,
"endIndex": 2671,
"uri": "https://www.tfrecipes.com/Oranges%20with%20dried%20cherries/",
},
{
"startIndex": 3390,
"endIndex": 3529,
"uri": "https://github.com/quantumcognition/Crud-palm",
},
{
"startIndex": 3568,
"endIndex": 3724,
"uri": "https://recipes.net/dessert/cakes/ultimate-easy-gingerbread/",
},
{
"startIndex": 3640,
"endIndex": 3770,
"uri": "https://recipes.net/dessert/cookies/soft-and-chewy-peanut-butter-cookies/",
},
]
},
}
],
"usageMetadata": {"promptTokenCount": 336, "totalTokenCount": 336},
}
return mock_response
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai",
@pytest.mark.asyncio
async def test_gemini_pro_json_schema_httpx(provider):
async def test_gemini_pro_json_schema_httpx_content_policy_error(provider):
load_vertex_ai_credentials()
litellm.set_verbose = True
messages = [
{
"role": "user",
"content": """
List 5 popular cookie recipes.
Using this JSON schema:
List 5 popular cookie recipes.
Recipe = {"recipe_name": str}
Return a `list[Recipe]`
Using this JSON schema:
```json
{'$defs': {'Recipe': {'properties': {'recipe_name': {'examples': ['Chocolate Chip Cookies', 'Peanut Butter Cookies'], 'maxLength': 100, 'title': 'The recipe name', 'type': 'string'}, 'estimated_time': {'anyOf': [{'minimum': 0, 'type': 'integer'}, {'type': 'null'}], 'default': None, 'description': 'The estimated time to make the recipe in minutes', 'examples': [30, 45], 'title': 'The estimated time'}, 'ingredients': {'examples': [['flour', 'sugar', 'chocolate chips'], ['peanut butter', 'sugar', 'eggs']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The ingredients', 'type': 'array'}, 'instructions': {'examples': [['mix', 'bake'], ['mix', 'chill', 'bake']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The instructions', 'type': 'array'}}, 'required': ['recipe_name', 'ingredients', 'instructions'], 'title': 'Recipe', 'type': 'object'}}, 'properties': {'recipes': {'items': {'$ref': '#/$defs/Recipe'}, 'maxItems': 11, 'title': 'The recipes', 'type': 'array'}}, 'required': ['recipes'], 'title': 'MyRecipes', 'type': 'object'}
```
""",
}
]
from litellm.llms.custom_httpx.http_handler import HTTPHandler
response = completion(
model="vertex_ai_beta/gemini-1.5-flash-preview-0514",
messages=messages,
response_format={"type": "json_object"},
)
client = HTTPHandler()
assert response.choices[0].message.content is not None
response_json = json.loads(response.choices[0].message.content)
with patch.object(client, "post", side_effect=vertex_httpx_mock_post) as mock_call:
try:
response = completion(
model="vertex_ai_beta/gemini-1.5-flash",
messages=messages,
response_format={"type": "json_object"},
client=client,
)
except litellm.ContentPolicyViolationError as e:
pass
assert isinstance(response_json, dict) or isinstance(response_json, list)
mock_call.assert_called_once()
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")

View file

@ -1,26 +1,26 @@
from openai import AuthenticationError, BadRequestError, RateLimitError, OpenAIError
import asyncio
import os
import subprocess
import sys
import traceback
import subprocess, asyncio
from typing import Any
from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import (
embedding,
completion,
# AuthenticationError,
ContextWindowExceededError,
# RateLimitError,
# ServiceUnavailableError,
# OpenAIError,
)
from concurrent.futures import ThreadPoolExecutor
from unittest.mock import MagicMock, patch
import pytest
from unittest.mock import patch, MagicMock
import litellm
from litellm import ( # AuthenticationError,; RateLimitError,; ServiceUnavailableError,; OpenAIError,
ContextWindowExceededError,
completion,
embedding,
)
litellm.vertex_project = "pathrise-convert-1606954137718"
litellm.vertex_location = "us-central1"
@ -252,6 +252,7 @@ def test_completion_azure_exception():
async def asynctest_completion_azure_exception():
try:
import openai
import litellm
print("azure gpt-3.5 test\n\n")
@ -283,8 +284,11 @@ async def asynctest_completion_azure_exception():
def asynctest_completion_openai_exception_bad_model():
try:
import asyncio
import openai
import litellm, asyncio
import litellm
print("azure exception bad model\n\n")
litellm.set_verbose = True
@ -311,8 +315,11 @@ def asynctest_completion_openai_exception_bad_model():
def asynctest_completion_azure_exception_bad_model():
try:
import asyncio
import openai
import litellm, asyncio
import litellm
print("azure exception bad model\n\n")
litellm.set_verbose = True
@ -663,7 +670,7 @@ def test_litellm_predibase_exception():
# print(f"accuracy_score: {accuracy_score}")
@pytest.mark.parametrize("provider", ["predibase"])
@pytest.mark.parametrize("provider", ["predibase", "vertex_ai_beta"])
def test_exception_mapping(provider):
"""
For predibase, run through a set of mock exceptions

View file

@ -6235,7 +6235,11 @@ def exception_type(
llm_provider="sagemaker",
response=original_exception.response,
)
elif custom_llm_provider == "vertex_ai":
elif (
custom_llm_provider == "vertex_ai"
or custom_llm_provider == "vertex_ai_beta"
or custom_llm_provider == "gemini"
):
if (
"Vertex AI API has not been used in project" in error_str
or "Unable to find your project" in error_str
@ -6254,6 +6258,13 @@ def exception_type(
),
litellm_debug_info=extra_information,
)
if "400 Request payload size exceeds" in error_str:
exception_mapping_worked = True
raise ContextWindowExceededError(
message=f"VertexException - {error_str}",
model=model,
llm_provider=custom_llm_provider,
)
elif (
"None Unknown Error." in error_str
or "Content has no parts." in error_str
@ -6287,13 +6298,13 @@ def exception_type(
)
elif "The response was blocked." in error_str:
exception_mapping_worked = True
raise UnprocessableEntityError(
message=f"VertexAIException UnprocessableEntityError - {error_str}",
raise ContentPolicyViolationError(
message=f"VertexAIException ContentPolicyViolationError - {error_str}",
model=model,
llm_provider="vertex_ai",
litellm_debug_info=extra_information,
response=httpx.Response(
status_code=422,
status_code=400,
request=httpx.Request(
method="POST",
url=" https://cloud.google.com/vertex-ai/",
@ -6345,6 +6356,27 @@ def exception_type(
),
),
)
if original_exception.status_code == 401:
exception_mapping_worked = True
raise AuthenticationError(
message=f"VertexAIException - {original_exception.message}",
llm_provider=custom_llm_provider,
model=model,
)
if original_exception.status_code == 404:
exception_mapping_worked = True
raise NotFoundError(
message=f"VertexAIException - {original_exception.message}",
llm_provider=custom_llm_provider,
model=model,
)
if original_exception.status_code == 408:
exception_mapping_worked = True
raise Timeout(
message=f"VertexAIException - {original_exception.message}",
llm_provider=custom_llm_provider,
model=model,
)
if original_exception.status_code == 429:
exception_mapping_worked = True
@ -6374,6 +6406,13 @@ def exception_type(
request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore
),
)
if original_exception.status_code == 503:
exception_mapping_worked = True
raise ServiceUnavailableError(
message=f"VertexAIException - {original_exception.message}",
llm_provider=custom_llm_provider,
model=model,
)
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
if "503 Getting metadata" in error_str:
# auth errors look like this

View file

@ -1185,6 +1185,33 @@
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
},
"gemini-1.5-flash": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315,
"input_cost_per_video_per_second": 0.0001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_token": 0.00000003125,
"input_cost_per_token_above_128k_tokens": 0.0000000625,
"output_cost_per_token": 0.00000009375,
"output_cost_per_token_above_128k_tokens": 0.0000001875,
"output_cost_per_image": 0.000263,
"output_cost_per_video_per_second": 0.000263,
"output_cost_per_audio_per_second": 0.00025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
},
"gemini-1.5-flash-001": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
@ -1207,6 +1234,7 @@
"output_cost_per_audio_per_second": 0.00025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1233,6 +1261,7 @@
"output_cost_per_audio_per_second": 0.00025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1253,6 +1282,7 @@
"output_cost_per_audio_per_second": 0.00025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1273,6 +1303,7 @@
"output_cost_per_audio_per_second": 0.00025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1293,6 +1324,7 @@
"output_cost_per_audio_per_second": 0.00025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"