diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index 5e09ad3de..2fcd74d5f 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -9,7 +9,7 @@ import types
 import uuid
 from enum import Enum
 from functools import partial
-from typing import Any, Callable, List, Literal, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
 
 import httpx  # type: ignore
 import ijson
@@ -241,6 +241,20 @@ class VertexGeminiConfig:
             "europe-west9",
         ]
 
+    def get_flagged_finish_reasons(self) -> Dict[str, str]:
+        """
+        Return Dictionary of finish reasons which indicate response was flagged
+
+        and what it means
+        """
+        return {
+            "SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.",
+            "RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.",
+            "BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.",
+            "PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.",
+            "SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
+        }
+
 
 async def make_call(
     client: Optional[AsyncHTTPHandler],
@@ -362,6 +376,27 @@ class VertexLLM(BaseLLM):
                 status_code=422,
             )
 
+        ## CHECK IF RESPONSE FLAGGED
+        if len(completion_response["candidates"]) > 0:
+            content_policy_violations = (
+                VertexGeminiConfig().get_flagged_finish_reasons()
+            )
+            if (
+                "finishReason" in completion_response["candidates"][0]
+                and completion_response["candidates"][0]["finishReason"]
+                in content_policy_violations.keys()
+            ):
+                ## CONTENT POLICY VIOLATION ERROR
+                raise VertexAIError(
+                    status_code=400,
+                    message="The response was blocked. Reason={}. Raw Response={}".format(
+                        content_policy_violations[
+                            completion_response["candidates"][0]["finishReason"]
+                        ],
+                        completion_response,
+                    ),
+                )
+
         model_response.choices = []  # type: ignore
 
         ## GET MODEL ##
@@ -804,6 +839,7 @@ class VertexLLM(BaseLLM):
             client = HTTPHandler(**_params)  # type: ignore
         else:
             client = client
+
         try:
             response = client.post(url=url, headers=headers, json=data)  # type: ignore
             response.raise_for_status()
diff --git a/litellm/main.py b/litellm/main.py
index f46a9578b..de611c66a 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1928,6 +1928,7 @@ def completion(
                 acompletion=acompletion,
                 timeout=timeout,
                 custom_llm_provider=custom_llm_provider,
+                client=client,
             )
 
         elif custom_llm_provider == "vertex_ai":
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 52ae8dae2..473f3d3fe 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -1185,6 +1185,33 @@
         "supports_tool_choice": true, 
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "gemini-1.5-flash": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 0.0001315,
+        "input_cost_per_video_per_second": 0.0001315,
+        "input_cost_per_audio_per_second": 0.000125,
+        "input_cost_per_token": 0.00000003125, 
+        "input_cost_per_token_above_128k_tokens": 0.0000000625, 
+        "output_cost_per_token": 0.00000009375,
+        "output_cost_per_token_above_128k_tokens": 0.0000001875,
+        "output_cost_per_image": 0.000263,
+        "output_cost_per_video_per_second": 0.000263,
+        "output_cost_per_audio_per_second": 0.00025,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "gemini-1.5-flash-001": {
         "max_tokens": 8192,
         "max_input_tokens": 1000000,
@@ -1207,6 +1234,7 @@
         "output_cost_per_audio_per_second": 0.00025,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
+        "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_vision": true,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1233,6 +1261,7 @@
         "output_cost_per_audio_per_second": 0.00025,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
+        "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_vision": true,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1253,6 +1282,7 @@
         "output_cost_per_audio_per_second": 0.00025,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
+        "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_tool_choice": true, 
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1273,6 +1303,7 @@
         "output_cost_per_audio_per_second": 0.00025,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
+        "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_tool_choice": true, 
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1293,6 +1324,7 @@
         "output_cost_per_audio_per_second": 0.00025,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
+        "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_tool_choice": true, 
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index a08a0ba55..68bb32b4e 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -15,6 +15,7 @@ import asyncio
 import json
 import os
 import tempfile
+from unittest.mock import MagicMock, patch
 
 import pytest
 
@@ -695,37 +696,161 @@ async def test_gemini_pro_function_calling_httpx(provider, sync_mode):
             pytest.fail("An unexpected exception occurred - {}".format(str(e)))
 
 
-@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
+# @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
+def vertex_httpx_mock_post(url, data=None, json=None, headers=None):
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.headers = {"Content-Type": "application/json"}
+    mock_response.json.return_value = {
+        "candidates": [
+            {
+                "finishReason": "RECITATION",
+                "safetyRatings": [
+                    {
+                        "category": "HARM_CATEGORY_HATE_SPEECH",
+                        "probability": "NEGLIGIBLE",
+                        "probabilityScore": 0.14965563,
+                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
+                        "severityScore": 0.13660839,
+                    },
+                    {
+                        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                        "probability": "NEGLIGIBLE",
+                        "probabilityScore": 0.16344544,
+                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
+                        "severityScore": 0.10230471,
+                    },
+                    {
+                        "category": "HARM_CATEGORY_HARASSMENT",
+                        "probability": "NEGLIGIBLE",
+                        "probabilityScore": 0.1979091,
+                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
+                        "severityScore": 0.06052939,
+                    },
+                    {
+                        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                        "probability": "NEGLIGIBLE",
+                        "probabilityScore": 0.1765296,
+                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
+                        "severityScore": 0.18417984,
+                    },
+                ],
+                "citationMetadata": {
+                    "citations": [
+                        {
+                            "startIndex": 251,
+                            "endIndex": 380,
+                            "uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1",
+                        },
+                        {
+                            "startIndex": 393,
+                            "endIndex": 535,
+                            "uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies",
+                        },
+                        {
+                            "startIndex": 439,
+                            "endIndex": 581,
+                            "uri": "https://mast-producing-trees.org/aldis-chocolate-chips-are-peanut-and-tree-nut-free/",
+                        },
+                        {
+                            "startIndex": 1117,
+                            "endIndex": 1265,
+                            "uri": "https://github.com/frdrck100/To_Do_Assignments",
+                        },
+                        {
+                            "startIndex": 1146,
+                            "endIndex": 1288,
+                            "uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies",
+                        },
+                        {
+                            "startIndex": 1166,
+                            "endIndex": 1299,
+                            "uri": "https://www.girlversusdough.com/brookies/",
+                        },
+                        {
+                            "startIndex": 1780,
+                            "endIndex": 1909,
+                            "uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1",
+                        },
+                        {
+                            "startIndex": 1834,
+                            "endIndex": 1964,
+                            "uri": "https://newsd.in/national-cream-cheese-brownie-day-2023-date-history-how-to-make-a-cream-cheese-brownie/",
+                        },
+                        {
+                            "startIndex": 1846,
+                            "endIndex": 1989,
+                            "uri": "https://github.com/frdrck100/To_Do_Assignments",
+                        },
+                        {
+                            "startIndex": 2121,
+                            "endIndex": 2261,
+                            "uri": "https://recipes.net/copycat/hardee/hardees-chocolate-chip-cookie-recipe/",
+                        },
+                        {
+                            "startIndex": 2505,
+                            "endIndex": 2671,
+                            "uri": "https://www.tfrecipes.com/Oranges%20with%20dried%20cherries/",
+                        },
+                        {
+                            "startIndex": 3390,
+                            "endIndex": 3529,
+                            "uri": "https://github.com/quantumcognition/Crud-palm",
+                        },
+                        {
+                            "startIndex": 3568,
+                            "endIndex": 3724,
+                            "uri": "https://recipes.net/dessert/cakes/ultimate-easy-gingerbread/",
+                        },
+                        {
+                            "startIndex": 3640,
+                            "endIndex": 3770,
+                            "uri": "https://recipes.net/dessert/cookies/soft-and-chewy-peanut-butter-cookies/",
+                        },
+                    ]
+                },
+            }
+        ],
+        "usageMetadata": {"promptTokenCount": 336, "totalTokenCount": 336},
+    }
+    return mock_response
+
+
 @pytest.mark.parametrize("provider", ["vertex_ai_beta"])  # "vertex_ai",
 @pytest.mark.asyncio
-async def test_gemini_pro_json_schema_httpx(provider):
+async def test_gemini_pro_json_schema_httpx_content_policy_error(provider):
     load_vertex_ai_credentials()
     litellm.set_verbose = True
     messages = [
         {
             "role": "user",
             "content": """
-    List 5 popular cookie recipes.
+    
+List 5 popular cookie recipes.
 
-    Using this JSON schema:
-
-        Recipe = {"recipe_name": str}
-
-    Return a `list[Recipe]`
+Using this JSON schema:
+```json
+{'$defs': {'Recipe': {'properties': {'recipe_name': {'examples': ['Chocolate Chip Cookies', 'Peanut Butter Cookies'], 'maxLength': 100, 'title': 'The recipe name', 'type': 'string'}, 'estimated_time': {'anyOf': [{'minimum': 0, 'type': 'integer'}, {'type': 'null'}], 'default': None, 'description': 'The estimated time to make the recipe in minutes', 'examples': [30, 45], 'title': 'The estimated time'}, 'ingredients': {'examples': [['flour', 'sugar', 'chocolate chips'], ['peanut butter', 'sugar', 'eggs']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The ingredients', 'type': 'array'}, 'instructions': {'examples': [['mix', 'bake'], ['mix', 'chill', 'bake']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The instructions', 'type': 'array'}}, 'required': ['recipe_name', 'ingredients', 'instructions'], 'title': 'Recipe', 'type': 'object'}}, 'properties': {'recipes': {'items': {'$ref': '#/$defs/Recipe'}, 'maxItems': 11, 'title': 'The recipes', 'type': 'array'}}, 'required': ['recipes'], 'title': 'MyRecipes', 'type': 'object'}
+```
             """,
         }
     ]
+    from litellm.llms.custom_httpx.http_handler import HTTPHandler
 
-    response = completion(
-        model="vertex_ai_beta/gemini-1.5-flash-preview-0514",
-        messages=messages,
-        response_format={"type": "json_object"},
-    )
+    client = HTTPHandler()
 
-    assert response.choices[0].message.content is not None
-    response_json = json.loads(response.choices[0].message.content)
+    with patch.object(client, "post", side_effect=vertex_httpx_mock_post) as mock_call:
+        try:
+            response = completion(
+                model="vertex_ai_beta/gemini-1.5-flash",
+                messages=messages,
+                response_format={"type": "json_object"},
+                client=client,
+            )
+        except litellm.ContentPolicyViolationError as e:
+            pass
 
-    assert isinstance(response_json, dict) or isinstance(response_json, list)
+        mock_call.assert_called_once()
 
 
 @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py
index 4d20a39cf..28d742931 100644
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@@ -1,26 +1,26 @@
-from openai import AuthenticationError, BadRequestError, RateLimitError, OpenAIError
+import asyncio
 import os
+import subprocess
 import sys
 import traceback
-import subprocess, asyncio
 from typing import Any
 
+from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError
+
 sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import litellm
-from litellm import (
-    embedding,
-    completion,
-    #     AuthenticationError,
-    ContextWindowExceededError,
-    #     RateLimitError,
-    #     ServiceUnavailableError,
-    #     OpenAIError,
-)
 from concurrent.futures import ThreadPoolExecutor
+from unittest.mock import MagicMock, patch
+
 import pytest
-from unittest.mock import patch, MagicMock
+
+import litellm
+from litellm import (  # AuthenticationError,; RateLimitError,; ServiceUnavailableError,; OpenAIError,
+    ContextWindowExceededError,
+    completion,
+    embedding,
+)
 
 litellm.vertex_project = "pathrise-convert-1606954137718"
 litellm.vertex_location = "us-central1"
@@ -252,6 +252,7 @@ def test_completion_azure_exception():
 async def asynctest_completion_azure_exception():
     try:
         import openai
+
         import litellm
 
         print("azure gpt-3.5 test\n\n")
@@ -283,8 +284,11 @@ async def asynctest_completion_azure_exception():
 
 def asynctest_completion_openai_exception_bad_model():
     try:
+        import asyncio
+
         import openai
-        import litellm, asyncio
+
+        import litellm
 
         print("azure exception bad model\n\n")
         litellm.set_verbose = True
@@ -311,8 +315,11 @@ def asynctest_completion_openai_exception_bad_model():
 
 def asynctest_completion_azure_exception_bad_model():
     try:
+        import asyncio
+
         import openai
-        import litellm, asyncio
+
+        import litellm
 
         print("azure exception bad model\n\n")
         litellm.set_verbose = True
@@ -663,7 +670,7 @@ def test_litellm_predibase_exception():
 # print(f"accuracy_score: {accuracy_score}")
 
 
-@pytest.mark.parametrize("provider", ["predibase"])
+@pytest.mark.parametrize("provider", ["predibase", "vertex_ai_beta"])
 def test_exception_mapping(provider):
     """
     For predibase, run through a set of mock exceptions
diff --git a/litellm/utils.py b/litellm/utils.py
index 795526a32..009c168b5 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -6240,7 +6240,11 @@ def exception_type(
                         llm_provider="sagemaker",
                         response=original_exception.response,
                     )
-            elif custom_llm_provider == "vertex_ai":
+            elif (
+                custom_llm_provider == "vertex_ai"
+                or custom_llm_provider == "vertex_ai_beta"
+                or custom_llm_provider == "gemini"
+            ):
                 if (
                     "Vertex AI API has not been used in project" in error_str
                     or "Unable to find your project" in error_str
@@ -6259,6 +6263,13 @@ def exception_type(
                         ),
                         litellm_debug_info=extra_information,
                     )
+                if "400 Request payload size exceeds" in error_str:
+                    exception_mapping_worked = True
+                    raise ContextWindowExceededError(
+                        message=f"VertexException - {error_str}",
+                        model=model,
+                        llm_provider=custom_llm_provider,
+                    )
                 elif (
                     "None Unknown Error." in error_str
                     or "Content has no parts." in error_str
@@ -6292,13 +6303,13 @@ def exception_type(
                     )
                 elif "The response was blocked." in error_str:
                     exception_mapping_worked = True
-                    raise UnprocessableEntityError(
-                        message=f"VertexAIException UnprocessableEntityError - {error_str}",
+                    raise ContentPolicyViolationError(
+                        message=f"VertexAIException ContentPolicyViolationError - {error_str}",
                         model=model,
                         llm_provider="vertex_ai",
                         litellm_debug_info=extra_information,
                         response=httpx.Response(
-                            status_code=422,
+                            status_code=400,
                             request=httpx.Request(
                                 method="POST",
                                 url=" https://cloud.google.com/vertex-ai/",
@@ -6350,6 +6361,27 @@ def exception_type(
                                 ),
                             ),
                         )
+                    if original_exception.status_code == 401:
+                        exception_mapping_worked = True
+                        raise AuthenticationError(
+                            message=f"VertexAIException - {original_exception.message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                        )
+                    if original_exception.status_code == 404:
+                        exception_mapping_worked = True
+                        raise NotFoundError(
+                            message=f"VertexAIException - {original_exception.message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                        )
+                    if original_exception.status_code == 408:
+                        exception_mapping_worked = True
+                        raise Timeout(
+                            message=f"VertexAIException - {original_exception.message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                        )
 
                     if original_exception.status_code == 429:
                         exception_mapping_worked = True
@@ -6379,6 +6411,13 @@ def exception_type(
                                 request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"),  # type: ignore
                             ),
                         )
+                    if original_exception.status_code == 503:
+                        exception_mapping_worked = True
+                        raise ServiceUnavailableError(
+                            message=f"VertexAIException - {original_exception.message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                        )
             elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
                 if "503 Getting metadata" in error_str:
                     # auth errors look like this
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 52ae8dae2..473f3d3fe 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -1185,6 +1185,33 @@
         "supports_tool_choice": true, 
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "gemini-1.5-flash": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 0.0001315,
+        "input_cost_per_video_per_second": 0.0001315,
+        "input_cost_per_audio_per_second": 0.000125,
+        "input_cost_per_token": 0.00000003125, 
+        "input_cost_per_token_above_128k_tokens": 0.0000000625, 
+        "output_cost_per_token": 0.00000009375,
+        "output_cost_per_token_above_128k_tokens": 0.0000001875,
+        "output_cost_per_image": 0.000263,
+        "output_cost_per_video_per_second": 0.000263,
+        "output_cost_per_audio_per_second": 0.00025,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "gemini-1.5-flash-001": {
         "max_tokens": 8192,
         "max_input_tokens": 1000000,
@@ -1207,6 +1234,7 @@
         "output_cost_per_audio_per_second": 0.00025,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
+        "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_vision": true,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1233,6 +1261,7 @@
         "output_cost_per_audio_per_second": 0.00025,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
+        "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_vision": true,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1253,6 +1282,7 @@
         "output_cost_per_audio_per_second": 0.00025,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
+        "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_tool_choice": true, 
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1273,6 +1303,7 @@
         "output_cost_per_audio_per_second": 0.00025,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
+        "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_tool_choice": true, 
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1293,6 +1324,7 @@
         "output_cost_per_audio_per_second": 0.00025,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
+        "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_tool_choice": true, 
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"