Merge pull request #4271 from BerriAI/litellm_vertex_httpx_fix

fix(vertex_httpx.py): Correctly handle Vertex content policy violation error
2024-06-18 20:19:46 -07:00 · 2024-06-18 20:19:46 -07:00 · d96ffe8075
commit d96ffe8075
parent b79e21a81a f41c443abb
7 changed files with 309 additions and 37 deletions
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -9,7 +9,7 @@ import types
 import uuid
 from enum import Enum
 from functools import partial
-from typing import Any, Callable, List, Literal, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
 import httpx  # type: ignore
 import ijson
@ -241,6 +241,20 @@ class VertexGeminiConfig:
            "europe-west9",
        ]
    def get_flagged_finish_reasons(self) -> Dict[str, str]:
        """
        Return Dictionary of finish reasons which indicate response was flagged
        and what it means
        """
        return {
            "SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.",
            "RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.",
            "BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.",
            "PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.",
            "SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
        }
 async def make_call(
    client: Optional[AsyncHTTPHandler],
@ -362,6 +376,27 @@ class VertexLLM(BaseLLM):
                status_code=422,
            )
        ## CHECK IF RESPONSE FLAGGED
        if len(completion_response["candidates"]) > 0:
            content_policy_violations = (
                VertexGeminiConfig().get_flagged_finish_reasons()
            )
            if (
                "finishReason" in completion_response["candidates"][0]
                and completion_response["candidates"][0]["finishReason"]
                in content_policy_violations.keys()
            ):
                ## CONTENT POLICY VIOLATION ERROR
                raise VertexAIError(
                    status_code=400,
                    message="The response was blocked. Reason={}. Raw Response={}".format(
                        content_policy_violations[
                            completion_response["candidates"][0]["finishReason"]
                        ],
                        completion_response,
                    ),
                )
        model_response.choices = []  # type: ignore
        ## GET MODEL ##
@ -804,6 +839,7 @@ class VertexLLM(BaseLLM):
            client = HTTPHandler(**_params)  # type: ignore
        else:
            client = client
        try:
            response = client.post(url=url, headers=headers, json=data)  # type: ignore
            response.raise_for_status()
--- a/litellm/main.py
+++ b/litellm/main.py
@ -1928,6 +1928,7 @@ def completion(
                acompletion=acompletion,
                timeout=timeout,
                custom_llm_provider=custom_llm_provider,
                client=client,
            )
        elif custom_llm_provider == "vertex_ai":
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -1185,6 +1185,33 @@
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
    "gemini-1.5-flash": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
        "max_output_tokens": 8192,
        "max_images_per_prompt": 3000,
        "max_videos_per_prompt": 10,
        "max_video_length": 1,
        "max_audio_length_hours": 8.4,
        "max_audio_per_prompt": 1,
        "max_pdf_size_mb": 30,
        "input_cost_per_image": 0.0001315,
        "input_cost_per_video_per_second": 0.0001315,
        "input_cost_per_audio_per_second": 0.000125,
        "input_cost_per_token": 0.00000003125, 
        "input_cost_per_token_above_128k_tokens": 0.0000000625, 
        "output_cost_per_token": 0.00000009375,
        "output_cost_per_token_above_128k_tokens": 0.0000001875,
        "output_cost_per_image": 0.000263,
        "output_cost_per_video_per_second": 0.000263,
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
    "gemini-1.5-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
@ -1207,6 +1234,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1233,6 +1261,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1253,6 +1282,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1273,6 +1303,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1293,6 +1324,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -15,6 +15,7 @@ import asyncio
 import json
 import os
 import tempfile
 from unittest.mock import MagicMock, patch
 import pytest
@ -695,37 +696,161 @@ async def test_gemini_pro_function_calling_httpx(provider, sync_mode):
            pytest.fail("An unexpected exception occurred - {}".format(str(e)))
-@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
+# @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
 def vertex_httpx_mock_post(url, data=None, json=None, headers=None):
    mock_response = MagicMock()
    mock_response.status_code = 200
    mock_response.headers = {"Content-Type": "application/json"}
    mock_response.json.return_value = {
        "candidates": [
            {
                "finishReason": "RECITATION",
                "safetyRatings": [
                    {
                        "category": "HARM_CATEGORY_HATE_SPEECH",
                        "probability": "NEGLIGIBLE",
                        "probabilityScore": 0.14965563,
                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
                        "severityScore": 0.13660839,
                    },
                    {
                        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
                        "probability": "NEGLIGIBLE",
                        "probabilityScore": 0.16344544,
                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
                        "severityScore": 0.10230471,
                    },
                    {
                        "category": "HARM_CATEGORY_HARASSMENT",
                        "probability": "NEGLIGIBLE",
                        "probabilityScore": 0.1979091,
                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
                        "severityScore": 0.06052939,
                    },
                    {
                        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
                        "probability": "NEGLIGIBLE",
                        "probabilityScore": 0.1765296,
                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
                        "severityScore": 0.18417984,
                    },
                ],
                "citationMetadata": {
                    "citations": [
                        {
                            "startIndex": 251,
                            "endIndex": 380,
                            "uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1",
                        },
                        {
                            "startIndex": 393,
                            "endIndex": 535,
                            "uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies",
                        },
                        {
                            "startIndex": 439,
                            "endIndex": 581,
                            "uri": "https://mast-producing-trees.org/aldis-chocolate-chips-are-peanut-and-tree-nut-free/",
                        },
                        {
                            "startIndex": 1117,
                            "endIndex": 1265,
                            "uri": "https://github.com/frdrck100/To_Do_Assignments",
                        },
                        {
                            "startIndex": 1146,
                            "endIndex": 1288,
                            "uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies",
                        },
                        {
                            "startIndex": 1166,
                            "endIndex": 1299,
                            "uri": "https://www.girlversusdough.com/brookies/",
                        },
                        {
                            "startIndex": 1780,
                            "endIndex": 1909,
                            "uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1",
                        },
                        {
                            "startIndex": 1834,
                            "endIndex": 1964,
                            "uri": "https://newsd.in/national-cream-cheese-brownie-day-2023-date-history-how-to-make-a-cream-cheese-brownie/",
                        },
                        {
                            "startIndex": 1846,
                            "endIndex": 1989,
                            "uri": "https://github.com/frdrck100/To_Do_Assignments",
                        },
                        {
                            "startIndex": 2121,
                            "endIndex": 2261,
                            "uri": "https://recipes.net/copycat/hardee/hardees-chocolate-chip-cookie-recipe/",
                        },
                        {
                            "startIndex": 2505,
                            "endIndex": 2671,
                            "uri": "https://www.tfrecipes.com/Oranges%20with%20dried%20cherries/",
                        },
                        {
                            "startIndex": 3390,
                            "endIndex": 3529,
                            "uri": "https://github.com/quantumcognition/Crud-palm",
                        },
                        {
                            "startIndex": 3568,
                            "endIndex": 3724,
                            "uri": "https://recipes.net/dessert/cakes/ultimate-easy-gingerbread/",
                        },
                        {
                            "startIndex": 3640,
                            "endIndex": 3770,
                            "uri": "https://recipes.net/dessert/cookies/soft-and-chewy-peanut-butter-cookies/",
                        },
                    ]
                },
            }
        ],
        "usageMetadata": {"promptTokenCount": 336, "totalTokenCount": 336},
    }
    return mock_response
@pytest.mark.parametrize("provider", ["vertex_ai_beta"])  # "vertex_ai",
@pytest.mark.asyncio
-async def test_gemini_pro_json_schema_httpx(provider):
+async def test_gemini_pro_json_schema_httpx_content_policy_error(provider):
    load_vertex_ai_credentials()
    litellm.set_verbose = True
    messages = [
        {
            "role": "user",
            "content": """
    List 5 popular cookie recipes.
-    Using this JSON schema:
+List 5 popular cookie recipes.
-        Recipe = {"recipe_name": str}
+Using this JSON schema:
-
+```json
-    Return a `list[Recipe]`
+{'$defs': {'Recipe': {'properties': {'recipe_name': {'examples': ['Chocolate Chip Cookies', 'Peanut Butter Cookies'], 'maxLength': 100, 'title': 'The recipe name', 'type': 'string'}, 'estimated_time': {'anyOf': [{'minimum': 0, 'type': 'integer'}, {'type': 'null'}], 'default': None, 'description': 'The estimated time to make the recipe in minutes', 'examples': [30, 45], 'title': 'The estimated time'}, 'ingredients': {'examples': [['flour', 'sugar', 'chocolate chips'], ['peanut butter', 'sugar', 'eggs']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The ingredients', 'type': 'array'}, 'instructions': {'examples': [['mix', 'bake'], ['mix', 'chill', 'bake']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The instructions', 'type': 'array'}}, 'required': ['recipe_name', 'ingredients', 'instructions'], 'title': 'Recipe', 'type': 'object'}}, 'properties': {'recipes': {'items': {'$ref': '#/$defs/Recipe'}, 'maxItems': 11, 'title': 'The recipes', 'type': 'array'}}, 'required': ['recipes'], 'title': 'MyRecipes', 'type': 'object'}
 ```
            """,
        }
    ]
    from litellm.llms.custom_httpx.http_handler import HTTPHandler
    client = HTTPHandler()
    with patch.object(client, "post", side_effect=vertex_httpx_mock_post) as mock_call:
        try:
            response = completion(
-        model="vertex_ai_beta/gemini-1.5-flash-preview-0514",
+                model="vertex_ai_beta/gemini-1.5-flash",
                messages=messages,
                response_format={"type": "json_object"},
                client=client,
            )
        except litellm.ContentPolicyViolationError as e:
            pass
-    assert response.choices[0].message.content is not None
+        mock_call.assert_called_once()
    response_json = json.loads(response.choices[0].message.content)
    assert isinstance(response_json, dict) or isinstance(response_json, list)
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -1,26 +1,26 @@
-from openai import AuthenticationError, BadRequestError, RateLimitError, OpenAIError
+import asyncio
 import os
 import subprocess
 import sys
 import traceback
 import subprocess, asyncio
 from typing import Any
 from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
 from litellm import (
    embedding,
    completion,
    #     AuthenticationError,
    ContextWindowExceededError,
    #     RateLimitError,
    #     ServiceUnavailableError,
    #     OpenAIError,
 )
 from concurrent.futures import ThreadPoolExecutor
 from unittest.mock import MagicMock, patch
 import pytest
-from unittest.mock import patch, MagicMock
+
 import litellm
 from litellm import (  # AuthenticationError,; RateLimitError,; ServiceUnavailableError,; OpenAIError,
    ContextWindowExceededError,
    completion,
    embedding,
 )
 litellm.vertex_project = "pathrise-convert-1606954137718"
 litellm.vertex_location = "us-central1"
@ -252,6 +252,7 @@ def test_completion_azure_exception():
 async def asynctest_completion_azure_exception():
    try:
        import openai
        import litellm
        print("azure gpt-3.5 test\n\n")
@ -283,8 +284,11 @@ async def asynctest_completion_azure_exception():
 def asynctest_completion_openai_exception_bad_model():
    try:
        import asyncio
        import openai
-        import litellm, asyncio
+
        import litellm
        print("azure exception bad model\n\n")
        litellm.set_verbose = True
@ -311,8 +315,11 @@ def asynctest_completion_openai_exception_bad_model():
 def asynctest_completion_azure_exception_bad_model():
    try:
        import asyncio
        import openai
-        import litellm, asyncio
+
        import litellm
        print("azure exception bad model\n\n")
        litellm.set_verbose = True
@ -663,7 +670,7 @@ def test_litellm_predibase_exception():
 # print(f"accuracy_score: {accuracy_score}")
-@pytest.mark.parametrize("provider", ["predibase"])
+@pytest.mark.parametrize("provider", ["predibase", "vertex_ai_beta"])
 def test_exception_mapping(provider):
    """
    For predibase, run through a set of mock exceptions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -6240,7 +6240,11 @@ def exception_type(
                        llm_provider="sagemaker",
                        response=original_exception.response,
                    )
-            elif custom_llm_provider == "vertex_ai":
+            elif (
                custom_llm_provider == "vertex_ai"
                or custom_llm_provider == "vertex_ai_beta"
                or custom_llm_provider == "gemini"
            ):
                if (
                    "Vertex AI API has not been used in project" in error_str
                    or "Unable to find your project" in error_str
@ -6259,6 +6263,13 @@ def exception_type(
                        ),
                        litellm_debug_info=extra_information,
                    )
                if "400 Request payload size exceeds" in error_str:
                    exception_mapping_worked = True
                    raise ContextWindowExceededError(
                        message=f"VertexException - {error_str}",
                        model=model,
                        llm_provider=custom_llm_provider,
                    )
                elif (
                    "None Unknown Error." in error_str
                    or "Content has no parts." in error_str
@ -6292,13 +6303,13 @@ def exception_type(
                    )
                elif "The response was blocked." in error_str:
                    exception_mapping_worked = True
-                    raise UnprocessableEntityError(
+                    raise ContentPolicyViolationError(
-                        message=f"VertexAIException UnprocessableEntityError - {error_str}",
+                        message=f"VertexAIException ContentPolicyViolationError - {error_str}",
                        model=model,
                        llm_provider="vertex_ai",
                        litellm_debug_info=extra_information,
                        response=httpx.Response(
-                            status_code=422,
+                            status_code=400,
                            request=httpx.Request(
                                method="POST",
                                url=" https://cloud.google.com/vertex-ai/",
@ -6350,6 +6361,27 @@ def exception_type(
                                ),
                            ),
                        )
                    if original_exception.status_code == 401:
                        exception_mapping_worked = True
                        raise AuthenticationError(
                            message=f"VertexAIException - {original_exception.message}",
                            llm_provider=custom_llm_provider,
                            model=model,
                        )
                    if original_exception.status_code == 404:
                        exception_mapping_worked = True
                        raise NotFoundError(
                            message=f"VertexAIException - {original_exception.message}",
                            llm_provider=custom_llm_provider,
                            model=model,
                        )
                    if original_exception.status_code == 408:
                        exception_mapping_worked = True
                        raise Timeout(
                            message=f"VertexAIException - {original_exception.message}",
                            llm_provider=custom_llm_provider,
                            model=model,
                        )
                    if original_exception.status_code == 429:
                        exception_mapping_worked = True
@ -6379,6 +6411,13 @@ def exception_type(
                                request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"),  # type: ignore
                            ),
                        )
                    if original_exception.status_code == 503:
                        exception_mapping_worked = True
                        raise ServiceUnavailableError(
                            message=f"VertexAIException - {original_exception.message}",
                            llm_provider=custom_llm_provider,
                            model=model,
                        )
            elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
                if "503 Getting metadata" in error_str:
                    # auth errors look like this
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -1185,6 +1185,33 @@
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
    "gemini-1.5-flash": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
        "max_output_tokens": 8192,
        "max_images_per_prompt": 3000,
        "max_videos_per_prompt": 10,
        "max_video_length": 1,
        "max_audio_length_hours": 8.4,
        "max_audio_per_prompt": 1,
        "max_pdf_size_mb": 30,
        "input_cost_per_image": 0.0001315,
        "input_cost_per_video_per_second": 0.0001315,
        "input_cost_per_audio_per_second": 0.000125,
        "input_cost_per_token": 0.00000003125, 
        "input_cost_per_token_above_128k_tokens": 0.0000000625, 
        "output_cost_per_token": 0.00000009375,
        "output_cost_per_token_above_128k_tokens": 0.0000001875,
        "output_cost_per_image": 0.000263,
        "output_cost_per_video_per_second": 0.000263,
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
    "gemini-1.5-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
@ -1207,6 +1234,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1233,6 +1261,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1253,6 +1282,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1273,6 +1303,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@ -1293,6 +1324,7 @@
        "output_cost_per_audio_per_second": 0.00025,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"