diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py index 5e09ad3de..2fcd74d5f 100644 --- a/litellm/llms/vertex_httpx.py +++ b/litellm/llms/vertex_httpx.py @@ -9,7 +9,7 @@ import types import uuid from enum import Enum from functools import partial -from typing import Any, Callable, List, Literal, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union import httpx # type: ignore import ijson @@ -241,6 +241,20 @@ class VertexGeminiConfig: "europe-west9", ] + def get_flagged_finish_reasons(self) -> Dict[str, str]: + """ + Return Dictionary of finish reasons which indicate response was flagged + + and what it means + """ + return { + "SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.", + "RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.", + "BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.", + "PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.", + "SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.", + } + async def make_call( client: Optional[AsyncHTTPHandler], @@ -362,6 +376,27 @@ class VertexLLM(BaseLLM): status_code=422, ) + ## CHECK IF RESPONSE FLAGGED + if len(completion_response["candidates"]) > 0: + content_policy_violations = ( + VertexGeminiConfig().get_flagged_finish_reasons() + ) + if ( + "finishReason" in completion_response["candidates"][0] + and completion_response["candidates"][0]["finishReason"] + in content_policy_violations.keys() + ): + ## CONTENT POLICY VIOLATION ERROR + raise VertexAIError( + status_code=400, + message="The response was blocked. Reason={}. Raw Response={}".format( + content_policy_violations[ + completion_response["candidates"][0]["finishReason"] + ], + completion_response, + ), + ) + model_response.choices = [] # type: ignore ## GET MODEL ## @@ -804,6 +839,7 @@ class VertexLLM(BaseLLM): client = HTTPHandler(**_params) # type: ignore else: client = client + try: response = client.post(url=url, headers=headers, json=data) # type: ignore response.raise_for_status() diff --git a/litellm/main.py b/litellm/main.py index f46a9578b..de611c66a 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1928,6 +1928,7 @@ def completion( acompletion=acompletion, timeout=timeout, custom_llm_provider=custom_llm_provider, + client=client, ) elif custom_llm_provider == "vertex_ai": diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 52ae8dae2..473f3d3fe 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1185,6 +1185,33 @@ "supports_tool_choice": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "gemini-1.5-flash": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0.0001315, + "input_cost_per_video_per_second": 0.0001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_token": 0.00000003125, + "input_cost_per_token_above_128k_tokens": 0.0000000625, + "output_cost_per_token": 0.00000009375, + "output_cost_per_token_above_128k_tokens": 0.0000001875, + "output_cost_per_image": 0.000263, + "output_cost_per_video_per_second": 0.000263, + "output_cost_per_audio_per_second": 0.00025, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "gemini-1.5-flash-001": { "max_tokens": 8192, "max_input_tokens": 1000000, @@ -1207,6 +1234,7 @@ "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", + "supports_system_messages": true, "supports_function_calling": true, "supports_vision": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1233,6 +1261,7 @@ "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", + "supports_system_messages": true, "supports_function_calling": true, "supports_vision": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1253,6 +1282,7 @@ "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", + "supports_system_messages": true, "supports_function_calling": true, "supports_tool_choice": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1273,6 +1303,7 @@ "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", + "supports_system_messages": true, "supports_function_calling": true, "supports_tool_choice": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1293,6 +1324,7 @@ "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", + "supports_system_messages": true, "supports_function_calling": true, "supports_tool_choice": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index a08a0ba55..68bb32b4e 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -15,6 +15,7 @@ import asyncio import json import os import tempfile +from unittest.mock import MagicMock, patch import pytest @@ -695,37 +696,161 @@ async def test_gemini_pro_function_calling_httpx(provider, sync_mode): pytest.fail("An unexpected exception occurred - {}".format(str(e))) -@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call") +# @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call") +def vertex_httpx_mock_post(url, data=None, json=None, headers=None): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Type": "application/json"} + mock_response.json.return_value = { + "candidates": [ + { + "finishReason": "RECITATION", + "safetyRatings": [ + { + "category": "HARM_CATEGORY_HATE_SPEECH", + "probability": "NEGLIGIBLE", + "probabilityScore": 0.14965563, + "severity": "HARM_SEVERITY_NEGLIGIBLE", + "severityScore": 0.13660839, + }, + { + "category": "HARM_CATEGORY_DANGEROUS_CONTENT", + "probability": "NEGLIGIBLE", + "probabilityScore": 0.16344544, + "severity": "HARM_SEVERITY_NEGLIGIBLE", + "severityScore": 0.10230471, + }, + { + "category": "HARM_CATEGORY_HARASSMENT", + "probability": "NEGLIGIBLE", + "probabilityScore": 0.1979091, + "severity": "HARM_SEVERITY_NEGLIGIBLE", + "severityScore": 0.06052939, + }, + { + "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", + "probability": "NEGLIGIBLE", + "probabilityScore": 0.1765296, + "severity": "HARM_SEVERITY_NEGLIGIBLE", + "severityScore": 0.18417984, + }, + ], + "citationMetadata": { + "citations": [ + { + "startIndex": 251, + "endIndex": 380, + "uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1", + }, + { + "startIndex": 393, + "endIndex": 535, + "uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies", + }, + { + "startIndex": 439, + "endIndex": 581, + "uri": "https://mast-producing-trees.org/aldis-chocolate-chips-are-peanut-and-tree-nut-free/", + }, + { + "startIndex": 1117, + "endIndex": 1265, + "uri": "https://github.com/frdrck100/To_Do_Assignments", + }, + { + "startIndex": 1146, + "endIndex": 1288, + "uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies", + }, + { + "startIndex": 1166, + "endIndex": 1299, + "uri": "https://www.girlversusdough.com/brookies/", + }, + { + "startIndex": 1780, + "endIndex": 1909, + "uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1", + }, + { + "startIndex": 1834, + "endIndex": 1964, + "uri": "https://newsd.in/national-cream-cheese-brownie-day-2023-date-history-how-to-make-a-cream-cheese-brownie/", + }, + { + "startIndex": 1846, + "endIndex": 1989, + "uri": "https://github.com/frdrck100/To_Do_Assignments", + }, + { + "startIndex": 2121, + "endIndex": 2261, + "uri": "https://recipes.net/copycat/hardee/hardees-chocolate-chip-cookie-recipe/", + }, + { + "startIndex": 2505, + "endIndex": 2671, + "uri": "https://www.tfrecipes.com/Oranges%20with%20dried%20cherries/", + }, + { + "startIndex": 3390, + "endIndex": 3529, + "uri": "https://github.com/quantumcognition/Crud-palm", + }, + { + "startIndex": 3568, + "endIndex": 3724, + "uri": "https://recipes.net/dessert/cakes/ultimate-easy-gingerbread/", + }, + { + "startIndex": 3640, + "endIndex": 3770, + "uri": "https://recipes.net/dessert/cookies/soft-and-chewy-peanut-butter-cookies/", + }, + ] + }, + } + ], + "usageMetadata": {"promptTokenCount": 336, "totalTokenCount": 336}, + } + return mock_response + + @pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai", @pytest.mark.asyncio -async def test_gemini_pro_json_schema_httpx(provider): +async def test_gemini_pro_json_schema_httpx_content_policy_error(provider): load_vertex_ai_credentials() litellm.set_verbose = True messages = [ { "role": "user", "content": """ - List 5 popular cookie recipes. + +List 5 popular cookie recipes. - Using this JSON schema: - - Recipe = {"recipe_name": str} - - Return a `list[Recipe]` +Using this JSON schema: +```json +{'$defs': {'Recipe': {'properties': {'recipe_name': {'examples': ['Chocolate Chip Cookies', 'Peanut Butter Cookies'], 'maxLength': 100, 'title': 'The recipe name', 'type': 'string'}, 'estimated_time': {'anyOf': [{'minimum': 0, 'type': 'integer'}, {'type': 'null'}], 'default': None, 'description': 'The estimated time to make the recipe in minutes', 'examples': [30, 45], 'title': 'The estimated time'}, 'ingredients': {'examples': [['flour', 'sugar', 'chocolate chips'], ['peanut butter', 'sugar', 'eggs']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The ingredients', 'type': 'array'}, 'instructions': {'examples': [['mix', 'bake'], ['mix', 'chill', 'bake']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The instructions', 'type': 'array'}}, 'required': ['recipe_name', 'ingredients', 'instructions'], 'title': 'Recipe', 'type': 'object'}}, 'properties': {'recipes': {'items': {'$ref': '#/$defs/Recipe'}, 'maxItems': 11, 'title': 'The recipes', 'type': 'array'}}, 'required': ['recipes'], 'title': 'MyRecipes', 'type': 'object'} +``` """, } ] + from litellm.llms.custom_httpx.http_handler import HTTPHandler - response = completion( - model="vertex_ai_beta/gemini-1.5-flash-preview-0514", - messages=messages, - response_format={"type": "json_object"}, - ) + client = HTTPHandler() - assert response.choices[0].message.content is not None - response_json = json.loads(response.choices[0].message.content) + with patch.object(client, "post", side_effect=vertex_httpx_mock_post) as mock_call: + try: + response = completion( + model="vertex_ai_beta/gemini-1.5-flash", + messages=messages, + response_format={"type": "json_object"}, + client=client, + ) + except litellm.ContentPolicyViolationError as e: + pass - assert isinstance(response_json, dict) or isinstance(response_json, list) + mock_call.assert_called_once() @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call") diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py index 4d20a39cf..28d742931 100644 --- a/litellm/tests/test_exceptions.py +++ b/litellm/tests/test_exceptions.py @@ -1,26 +1,26 @@ -from openai import AuthenticationError, BadRequestError, RateLimitError, OpenAIError +import asyncio import os +import subprocess import sys import traceback -import subprocess, asyncio from typing import Any +from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError + sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path -import litellm -from litellm import ( - embedding, - completion, - # AuthenticationError, - ContextWindowExceededError, - # RateLimitError, - # ServiceUnavailableError, - # OpenAIError, -) from concurrent.futures import ThreadPoolExecutor +from unittest.mock import MagicMock, patch + import pytest -from unittest.mock import patch, MagicMock + +import litellm +from litellm import ( # AuthenticationError,; RateLimitError,; ServiceUnavailableError,; OpenAIError, + ContextWindowExceededError, + completion, + embedding, +) litellm.vertex_project = "pathrise-convert-1606954137718" litellm.vertex_location = "us-central1" @@ -252,6 +252,7 @@ def test_completion_azure_exception(): async def asynctest_completion_azure_exception(): try: import openai + import litellm print("azure gpt-3.5 test\n\n") @@ -283,8 +284,11 @@ async def asynctest_completion_azure_exception(): def asynctest_completion_openai_exception_bad_model(): try: + import asyncio + import openai - import litellm, asyncio + + import litellm print("azure exception bad model\n\n") litellm.set_verbose = True @@ -311,8 +315,11 @@ def asynctest_completion_openai_exception_bad_model(): def asynctest_completion_azure_exception_bad_model(): try: + import asyncio + import openai - import litellm, asyncio + + import litellm print("azure exception bad model\n\n") litellm.set_verbose = True @@ -663,7 +670,7 @@ def test_litellm_predibase_exception(): # print(f"accuracy_score: {accuracy_score}") -@pytest.mark.parametrize("provider", ["predibase"]) +@pytest.mark.parametrize("provider", ["predibase", "vertex_ai_beta"]) def test_exception_mapping(provider): """ For predibase, run through a set of mock exceptions diff --git a/litellm/utils.py b/litellm/utils.py index 795526a32..009c168b5 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6240,7 +6240,11 @@ def exception_type( llm_provider="sagemaker", response=original_exception.response, ) - elif custom_llm_provider == "vertex_ai": + elif ( + custom_llm_provider == "vertex_ai" + or custom_llm_provider == "vertex_ai_beta" + or custom_llm_provider == "gemini" + ): if ( "Vertex AI API has not been used in project" in error_str or "Unable to find your project" in error_str @@ -6259,6 +6263,13 @@ def exception_type( ), litellm_debug_info=extra_information, ) + if "400 Request payload size exceeds" in error_str: + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"VertexException - {error_str}", + model=model, + llm_provider=custom_llm_provider, + ) elif ( "None Unknown Error." in error_str or "Content has no parts." in error_str @@ -6292,13 +6303,13 @@ def exception_type( ) elif "The response was blocked." in error_str: exception_mapping_worked = True - raise UnprocessableEntityError( - message=f"VertexAIException UnprocessableEntityError - {error_str}", + raise ContentPolicyViolationError( + message=f"VertexAIException ContentPolicyViolationError - {error_str}", model=model, llm_provider="vertex_ai", litellm_debug_info=extra_information, response=httpx.Response( - status_code=422, + status_code=400, request=httpx.Request( method="POST", url=" https://cloud.google.com/vertex-ai/", @@ -6350,6 +6361,27 @@ def exception_type( ), ), ) + if original_exception.status_code == 401: + exception_mapping_worked = True + raise AuthenticationError( + message=f"VertexAIException - {original_exception.message}", + llm_provider=custom_llm_provider, + model=model, + ) + if original_exception.status_code == 404: + exception_mapping_worked = True + raise NotFoundError( + message=f"VertexAIException - {original_exception.message}", + llm_provider=custom_llm_provider, + model=model, + ) + if original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"VertexAIException - {original_exception.message}", + llm_provider=custom_llm_provider, + model=model, + ) if original_exception.status_code == 429: exception_mapping_worked = True @@ -6379,6 +6411,13 @@ def exception_type( request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore ), ) + if original_exception.status_code == 503: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"VertexAIException - {original_exception.message}", + llm_provider=custom_llm_provider, + model=model, + ) elif custom_llm_provider == "palm" or custom_llm_provider == "gemini": if "503 Getting metadata" in error_str: # auth errors look like this diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 52ae8dae2..473f3d3fe 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1185,6 +1185,33 @@ "supports_tool_choice": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "gemini-1.5-flash": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0.0001315, + "input_cost_per_video_per_second": 0.0001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_token": 0.00000003125, + "input_cost_per_token_above_128k_tokens": 0.0000000625, + "output_cost_per_token": 0.00000009375, + "output_cost_per_token_above_128k_tokens": 0.0000001875, + "output_cost_per_image": 0.000263, + "output_cost_per_video_per_second": 0.000263, + "output_cost_per_audio_per_second": 0.00025, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "gemini-1.5-flash-001": { "max_tokens": 8192, "max_input_tokens": 1000000, @@ -1207,6 +1234,7 @@ "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", + "supports_system_messages": true, "supports_function_calling": true, "supports_vision": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1233,6 +1261,7 @@ "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", + "supports_system_messages": true, "supports_function_calling": true, "supports_vision": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1253,6 +1282,7 @@ "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", + "supports_system_messages": true, "supports_function_calling": true, "supports_tool_choice": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1273,6 +1303,7 @@ "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", + "supports_system_messages": true, "supports_function_calling": true, "supports_tool_choice": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1293,6 +1324,7 @@ "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", + "supports_system_messages": true, "supports_function_calling": true, "supports_tool_choice": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"