fix(vertex_httpx.py): re-raise vertex content policy violation error

Fixes https://github.com/BerriAI/litellm/issues/4270
2024-06-18 19:00:35 -07:00 · 2024-06-18 19:00:35 -07:00 · f41c443abb
commit f41c443abb
parent aef5cf3f22
7 changed files with 309 additions and 37 deletions
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -9,7 +9,7 @@ import types
 import uuid
 from enum import Enum
 from functools import partial
-from typing import Any, Callable, List, Literal, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union

 import httpx  # type: ignore
 import ijson
@ -241,6 +241,20 @@ class VertexGeminiConfig:
            "europe-west9",
        ]

+    def get_flagged_finish_reasons(self) -> Dict[str, str]:
+        """
+        Return Dictionary of finish reasons which indicate response was flagged
+
+        and what it means
+        """
+        return {
+            "SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.",
+            "RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.",
+            "BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.",
+            "PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.",
+            "SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
+        }
+

 async def make_call(
    client: Optional[AsyncHTTPHandler],
@ -362,6 +376,27 @@ class VertexLLM(BaseLLM):
                status_code=422,
            )

+        ## CHECK IF RESPONSE FLAGGED
+        if len(completion_response["candidates"]) > 0:
+            content_policy_violations = (
+                VertexGeminiConfig().get_flagged_finish_reasons()
+            )
+            if (
+                "finishReason" in completion_response["candidates"][0]
+                and completion_response["candidates"][0]["finishReason"]
+                in content_policy_violations.keys()
+            ):
+                ## CONTENT POLICY VIOLATION ERROR
+                raise VertexAIError(
+                    status_code=400,
+                    message="The response was blocked. Reason={}. Raw Response={}".format(
+                        content_policy_violations[
+                            completion_response["candidates"][0]["finishReason"]
+                        ],
+                        completion_response,
+                    ),
+                )
+
        model_response.choices = []  # type: ignore

        ## GET MODEL ##
@ -804,6 +839,7 @@ class VertexLLM(BaseLLM):
            client = HTTPHandler(**_params)  # type: ignore
        else:
            client = client
+
        try:
            response = client.post(url=url, headers=headers, json=data)  # type: ignore
            response.raise_for_status()
--- a/litellm/main.py
+++ b/litellm/main.py
@ -1928,6 +1928,7 @@ def completion(
                acompletion=acompletion,
                timeout=timeout,
                custom_llm_provider=custom_llm_provider,
+                client=client,
            )

        elif custom_llm_provider == "vertex_ai":
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -1185,6 +1185,33 @@
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
+    "gemini-1.5-flash": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 0.0001315,
+        "input_cost_per_video_per_second": 0.0001315,
+        "input_cost_per_audio_per_second": 0.000125,
+        "input_cost_per_token": 0.00000003125, 
+        "input_cost_per_token_above_128k_tokens": 0.0000000625, 
+        "output_cost_per_token": 0.00000009375,
+        "output_cost_per_token_above_128k_tokens": 0.0000001875,
+        "output_cost_per_image": 0.000263,
+        "output_cost_per_video_per_second": 0.000263,
+        "output_cost_per_audio_per_second": 0.00025,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
    "gemini-1.5-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
@ -1207,6 +1234,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
+        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1233,6 +1261,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
+        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1253,6 +1282,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
+        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1273,6 +1303,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
+        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1293,6 +1324,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
+        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -15,6 +15,7 @@ import asyncio
 import json
 import os
 import tempfile
+from unittest.mock import MagicMock, patch

 import pytest

@ -695,37 +696,161 @@ async def test_gemini_pro_function_calling_httpx(provider, sync_mode):
            pytest.fail("An unexpected exception occurred - {}".format(str(e)))


-@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
+# @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
+def vertex_httpx_mock_post(url, data=None, json=None, headers=None):
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.headers = {"Content-Type": "application/json"}
+    mock_response.json.return_value = {
+        "candidates": [
+            {
+                "finishReason": "RECITATION",
+                "safetyRatings": [
+                    {
+                        "category": "HARM_CATEGORY_HATE_SPEECH",
+                        "probability": "NEGLIGIBLE",
+                        "probabilityScore": 0.14965563,
+                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
+                        "severityScore": 0.13660839,
+                    },
+                    {
+                        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                        "probability": "NEGLIGIBLE",
+                        "probabilityScore": 0.16344544,
+                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
+                        "severityScore": 0.10230471,
+                    },
+                    {
+                        "category": "HARM_CATEGORY_HARASSMENT",
+                        "probability": "NEGLIGIBLE",
+                        "probabilityScore": 0.1979091,
+                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
+                        "severityScore": 0.06052939,
+                    },
+                    {
+                        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                        "probability": "NEGLIGIBLE",
+                        "probabilityScore": 0.1765296,
+                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
+                        "severityScore": 0.18417984,
+                    },
+                ],
+                "citationMetadata": {
+                    "citations": [
+                        {
+                            "startIndex": 251,
+                            "endIndex": 380,
+                            "uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1",
+                        },
+                        {
+                            "startIndex": 393,
+                            "endIndex": 535,
+                            "uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies",
+                        },
+                        {
+                            "startIndex": 439,
+                            "endIndex": 581,
+                            "uri": "https://mast-producing-trees.org/aldis-chocolate-chips-are-peanut-and-tree-nut-free/",
+                        },
+                        {
+                            "startIndex": 1117,
+                            "endIndex": 1265,
+                            "uri": "https://github.com/frdrck100/To_Do_Assignments",
+                        },
+                        {
+                            "startIndex": 1146,
+                            "endIndex": 1288,
+                            "uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies",
+                        },
+                        {
+                            "startIndex": 1166,
+                            "endIndex": 1299,
+                            "uri": "https://www.girlversusdough.com/brookies/",
+                        },
+                        {
+                            "startIndex": 1780,
+                            "endIndex": 1909,
+                            "uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1",
+                        },
+                        {
+                            "startIndex": 1834,
+                            "endIndex": 1964,
+                            "uri": "https://newsd.in/national-cream-cheese-brownie-day-2023-date-history-how-to-make-a-cream-cheese-brownie/",
+                        },
+                        {
+                            "startIndex": 1846,
+                            "endIndex": 1989,
+                            "uri": "https://github.com/frdrck100/To_Do_Assignments",
+                        },
+                        {
+                            "startIndex": 2121,
+                            "endIndex": 2261,
+                            "uri": "https://recipes.net/copycat/hardee/hardees-chocolate-chip-cookie-recipe/",
+                        },
+                        {
+                            "startIndex": 2505,
+                            "endIndex": 2671,
+                            "uri": "https://www.tfrecipes.com/Oranges%20with%20dried%20cherries/",
+                        },
+                        {
+                            "startIndex": 3390,
+                            "endIndex": 3529,
+                            "uri": "https://github.com/quantumcognition/Crud-palm",
+                        },
+                        {
+                            "startIndex": 3568,
+                            "endIndex": 3724,
+                            "uri": "https://recipes.net/dessert/cakes/ultimate-easy-gingerbread/",
+                        },
+                        {
+                            "startIndex": 3640,
+                            "endIndex": 3770,
+                            "uri": "https://recipes.net/dessert/cookies/soft-and-chewy-peanut-butter-cookies/",
+                        },
+                    ]
+                },
+            }
+        ],
+        "usageMetadata": {"promptTokenCount": 336, "totalTokenCount": 336},
+    }
+    return mock_response
+
+
@pytest.mark.parametrize("provider", ["vertex_ai_beta"])  # "vertex_ai",
@pytest.mark.asyncio
-async def test_gemini_pro_json_schema_httpx(provider):
+async def test_gemini_pro_json_schema_httpx_content_policy_error(provider):
    load_vertex_ai_credentials()
    litellm.set_verbose = True
    messages = [
        {
            "role": "user",
            "content": """
-    List 5 popular cookie recipes.
    
-    Using this JSON schema:
+List 5 popular cookie recipes.

-        Recipe = {"recipe_name": str}
-
-    Return a `list[Recipe]`
+Using this JSON schema:
+```json
+{'$defs': {'Recipe': {'properties': {'recipe_name': {'examples': ['Chocolate Chip Cookies', 'Peanut Butter Cookies'], 'maxLength': 100, 'title': 'The recipe name', 'type': 'string'}, 'estimated_time': {'anyOf': [{'minimum': 0, 'type': 'integer'}, {'type': 'null'}], 'default': None, 'description': 'The estimated time to make the recipe in minutes', 'examples': [30, 45], 'title': 'The estimated time'}, 'ingredients': {'examples': [['flour', 'sugar', 'chocolate chips'], ['peanut butter', 'sugar', 'eggs']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The ingredients', 'type': 'array'}, 'instructions': {'examples': [['mix', 'bake'], ['mix', 'chill', 'bake']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The instructions', 'type': 'array'}}, 'required': ['recipe_name', 'ingredients', 'instructions'], 'title': 'Recipe', 'type': 'object'}}, 'properties': {'recipes': {'items': {'$ref': '#/$defs/Recipe'}, 'maxItems': 11, 'title': 'The recipes', 'type': 'array'}}, 'required': ['recipes'], 'title': 'MyRecipes', 'type': 'object'}
+```
            """,
        }
    ]
+    from litellm.llms.custom_httpx.http_handler import HTTPHandler

-    response = completion(
-        model="vertex_ai_beta/gemini-1.5-flash-preview-0514",
-        messages=messages,
-        response_format={"type": "json_object"},
-    )
+    client = HTTPHandler()

-    assert response.choices[0].message.content is not None
-    response_json = json.loads(response.choices[0].message.content)
+    with patch.object(client, "post", side_effect=vertex_httpx_mock_post) as mock_call:
+        try:
+            response = completion(
+                model="vertex_ai_beta/gemini-1.5-flash",
+                messages=messages,
+                response_format={"type": "json_object"},
+                client=client,
+            )
+        except litellm.ContentPolicyViolationError as e:
+            pass

-    assert isinstance(response_json, dict) or isinstance(response_json, list)
+        mock_call.assert_called_once()


@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -1,26 +1,26 @@
-from openai import AuthenticationError, BadRequestError, RateLimitError, OpenAIError
+import asyncio
 import os
+import subprocess
 import sys
 import traceback
-import subprocess, asyncio
 from typing import Any

+from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError
+
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import litellm
-from litellm import (
-    embedding,
-    completion,
-    #     AuthenticationError,
-    ContextWindowExceededError,
-    #     RateLimitError,
-    #     ServiceUnavailableError,
-    #     OpenAIError,
-)
 from concurrent.futures import ThreadPoolExecutor
+from unittest.mock import MagicMock, patch
+
 import pytest
-from unittest.mock import patch, MagicMock
+
+import litellm
+from litellm import (  # AuthenticationError,; RateLimitError,; ServiceUnavailableError,; OpenAIError,
+    ContextWindowExceededError,
+    completion,
+    embedding,
+)

 litellm.vertex_project = "pathrise-convert-1606954137718"
 litellm.vertex_location = "us-central1"
@ -252,6 +252,7 @@ def test_completion_azure_exception():
 async def asynctest_completion_azure_exception():
    try:
        import openai
+
        import litellm

        print("azure gpt-3.5 test\n\n")
@ -283,8 +284,11 @@ async def asynctest_completion_azure_exception():

 def asynctest_completion_openai_exception_bad_model():
    try:
+        import asyncio
+
        import openai
-        import litellm, asyncio
+
+        import litellm

        print("azure exception bad model\n\n")
        litellm.set_verbose = True
@ -311,8 +315,11 @@ def asynctest_completion_openai_exception_bad_model():

 def asynctest_completion_azure_exception_bad_model():
    try:
+        import asyncio
+
        import openai
-        import litellm, asyncio
+
+        import litellm

        print("azure exception bad model\n\n")
        litellm.set_verbose = True
@ -663,7 +670,7 @@ def test_litellm_predibase_exception():
 # print(f"accuracy_score: {accuracy_score}")


-@pytest.mark.parametrize("provider", ["predibase"])
+@pytest.mark.parametrize("provider", ["predibase", "vertex_ai_beta"])
 def test_exception_mapping(provider):
    """
    For predibase, run through a set of mock exceptions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -6235,7 +6235,11 @@ def exception_type(
                        llm_provider="sagemaker",
                        response=original_exception.response,
                    )
-            elif custom_llm_provider == "vertex_ai":
+            elif (
+                custom_llm_provider == "vertex_ai"
+                or custom_llm_provider == "vertex_ai_beta"
+                or custom_llm_provider == "gemini"
+            ):
                if (
                    "Vertex AI API has not been used in project" in error_str
                    or "Unable to find your project" in error_str
@ -6254,6 +6258,13 @@ def exception_type(
                        ),
                        litellm_debug_info=extra_information,
                    )
+                if "400 Request payload size exceeds" in error_str:
+                    exception_mapping_worked = True
+                    raise ContextWindowExceededError(
+                        message=f"VertexException - {error_str}",
+                        model=model,
+                        llm_provider=custom_llm_provider,
+                    )
                elif (
                    "None Unknown Error." in error_str
                    or "Content has no parts." in error_str
@ -6287,13 +6298,13 @@ def exception_type(
                    )
                elif "The response was blocked." in error_str:
                    exception_mapping_worked = True
-                    raise UnprocessableEntityError(
-                        message=f"VertexAIException UnprocessableEntityError - {error_str}",
+                    raise ContentPolicyViolationError(
+                        message=f"VertexAIException ContentPolicyViolationError - {error_str}",
                        model=model,
                        llm_provider="vertex_ai",
                        litellm_debug_info=extra_information,
                        response=httpx.Response(
-                            status_code=422,
+                            status_code=400,
                            request=httpx.Request(
                                method="POST",
                                url=" https://cloud.google.com/vertex-ai/",
@ -6345,6 +6356,27 @@ def exception_type(
                                ),
                            ),
                        )
+                    if original_exception.status_code == 401:
+                        exception_mapping_worked = True
+                        raise AuthenticationError(
+                            message=f"VertexAIException - {original_exception.message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                        )
+                    if original_exception.status_code == 404:
+                        exception_mapping_worked = True
+                        raise NotFoundError(
+                            message=f"VertexAIException - {original_exception.message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                        )
+                    if original_exception.status_code == 408:
+                        exception_mapping_worked = True
+                        raise Timeout(
+                            message=f"VertexAIException - {original_exception.message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                        )

                    if original_exception.status_code == 429:
                        exception_mapping_worked = True
@ -6374,6 +6406,13 @@ def exception_type(
                                request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"),  # type: ignore
                            ),
                        )
+                    if original_exception.status_code == 503:
+                        exception_mapping_worked = True
+                        raise ServiceUnavailableError(
+                            message=f"VertexAIException - {original_exception.message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                        )
            elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
                if "503 Getting metadata" in error_str:
                    # auth errors look like this
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -1185,6 +1185,33 @@
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
+    "gemini-1.5-flash": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 0.0001315,
+        "input_cost_per_video_per_second": 0.0001315,
+        "input_cost_per_audio_per_second": 0.000125,
+        "input_cost_per_token": 0.00000003125, 
+        "input_cost_per_token_above_128k_tokens": 0.0000000625, 
+        "output_cost_per_token": 0.00000009375,
+        "output_cost_per_token_above_128k_tokens": 0.0000001875,
+        "output_cost_per_image": 0.000263,
+        "output_cost_per_video_per_second": 0.000263,
+        "output_cost_per_audio_per_second": 0.00025,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
    "gemini-1.5-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
@ -1207,6 +1234,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
+        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1233,6 +1261,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
+        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1253,6 +1282,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
+        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1273,6 +1303,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
+        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1293,6 +1324,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
+        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"