diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py
index 38b57c19ea..08baf78d4b 100644
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@@ -120,6 +120,8 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
         max_parallel_requests = user_api_key_dict.max_parallel_requests
         if max_parallel_requests is None:
             max_parallel_requests = sys.maxsize
+        if data is None:
+            data = {}
         global_max_parallel_requests = data.get("metadata", {}).get(
             "global_max_parallel_requests", None
         )
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 57609d29b5..168a522dbd 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -1,18 +1,15 @@
 model_list:
-  - model_name: fake-openai-endpoint
+  - model_name: multimodalembedding@001
     litellm_params:
-      model: openai/fake
-      api_key: fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+      model: vertex_ai/multimodalembedding@001
+      vertex_project: "adroit-crow-413218"
+      vertex_location: "us-central1"
+      vertex_credentials: adroit-crow-413218-a956eef1a2a8.json 
 
-guardrails:
-  - guardrail_name: "lakera-pre-guard"
-    litellm_params:
-      guardrail: lakera  # supported values: "aporia", "bedrock", "lakera"
-      mode: "during_call"
-      api_key: os.environ/LAKERA_API_KEY
-      api_base: os.environ/LAKERA_API_BASE
-      category_thresholds:
-        prompt_injection: 0.1
-        jailbreak: 0.1
-  
\ No newline at end of file
+default_vertex_config:
+  vertex_project: "adroit-crow-413218"
+  vertex_location: "us-central1"
+  vertex_credentials: adroit-crow-413218-a956eef1a2a8.json 
+
+litellm_settings:
+  drop_params: True
\ No newline at end of file
diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
index 1bfb1c2a09..53edbbcfd3 100644
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@@ -25,6 +25,9 @@ from litellm.batches.main import FileObject
 from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance
 from litellm.proxy._types import *
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+    create_pass_through_route,
+)
 
 router = APIRouter()
 default_vertex_config = None
@@ -70,10 +73,17 @@ def exception_handler(e: Exception):
         )
 
 
-async def execute_post_vertex_ai_request(
+@router.api_route(
+    "/vertex-ai/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"]
+)
+async def vertex_proxy_route(
+    endpoint: str,
     request: Request,
-    route: str,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
+    encoded_endpoint = httpx.URL(endpoint).path
+
     from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance
 
     if default_vertex_config is None:
@@ -83,250 +93,52 @@ async def execute_post_vertex_ai_request(
     vertex_project = default_vertex_config.get("vertex_project", None)
     vertex_location = default_vertex_config.get("vertex_location", None)
     vertex_credentials = default_vertex_config.get("vertex_credentials", None)
+    base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/"
 
-    request_data_json = {}
-    body = await request.body()
-    body_str = body.decode()
-    if len(body_str) > 0:
-        try:
-            request_data_json = ast.literal_eval(body_str)
-        except:
-            request_data_json = json.loads(body_str)
-
-    verbose_proxy_logger.debug(
-        "Request received by LiteLLM:\n{}".format(
-            json.dumps(request_data_json, indent=4)
-        ),
+    auth_header, _ = vertex_fine_tuning_apis_instance._get_token_and_url(
+        model="",
+        gemini_api_key=None,
+        vertex_credentials=vertex_credentials,
+        vertex_project=vertex_project,
+        vertex_location=vertex_location,
+        stream=False,
+        custom_llm_provider="vertex_ai_beta",
+        api_base="",
     )
 
-    response = (
-        await vertex_fine_tuning_apis_instance.pass_through_vertex_ai_POST_request(
-            request_data=request_data_json,
-            vertex_project=vertex_project,
-            vertex_location=vertex_location,
-            vertex_credentials=vertex_credentials,
-            request_route=route,
-        )
+    headers = {
+        "Authorization": f"Bearer {auth_header}",
+    }
+
+    request_route = encoded_endpoint
+    verbose_proxy_logger.debug("request_route %s", request_route)
+
+    # Ensure endpoint starts with '/' for proper URL construction
+    if not encoded_endpoint.startswith("/"):
+        encoded_endpoint = "/" + encoded_endpoint
+
+    # Construct the full target URL using httpx
+    base_url = httpx.URL(base_target_url)
+    updated_url = base_url.copy_with(path=encoded_endpoint)
+
+    verbose_proxy_logger.debug("updated url %s", updated_url)
+
+    ## check for streaming
+    is_streaming_request = False
+    if "stream" in str(updated_url):
+        is_streaming_request = True
+
+    ## CREATE PASS-THROUGH
+    endpoint_func = create_pass_through_route(
+        endpoint=endpoint,
+        target=str(updated_url),
+        custom_headers=headers,
+    )  # dynamically construct pass-through endpoint based on incoming path
+    received_value = await endpoint_func(
+        request,
+        fastapi_response,
+        user_api_key_dict,
+        stream=is_streaming_request,
     )
 
-    return response
-
-
-@router.post(
-    "/vertex-ai/publishers/google/models/{model_id:path}:generateContent",
-    dependencies=[Depends(user_api_key_auth)],
-    tags=["Vertex AI endpoints"],
-)
-async def vertex_generate_content(
-    request: Request,
-    fastapi_response: Response,
-    model_id: str,
-    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
-):
-    """
-    this is a pass through endpoint for the Vertex AI API. /generateContent endpoint
-
-    Example Curl:
-    ```
-    curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
-      -H "Content-Type: application/json" \
-      -H "Authorization: Bearer sk-1234" \
-      -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
-    ```
-
-    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#rest
-    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
-    """
-    try:
-        response = await execute_post_vertex_ai_request(
-            request=request,
-            route=f"/publishers/google/models/{model_id}:generateContent",
-        )
-        return response
-    except Exception as e:
-        raise exception_handler(e) from e
-
-
-@router.post(
-    "/vertex-ai/publishers/google/models/{model_id:path}:predict",
-    dependencies=[Depends(user_api_key_auth)],
-    tags=["Vertex AI endpoints"],
-)
-async def vertex_predict_endpoint(
-    request: Request,
-    fastapi_response: Response,
-    model_id: str,
-    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
-):
-    """
-    this is a pass through endpoint for the Vertex AI API. /predict endpoint
-    Use this for:
-    - Embeddings API - Text Embedding, Multi Modal Embedding
-    - Imagen API
-    - Code Completion API
-
-    Example Curl:
-    ```
-    curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \
-      -H "Content-Type: application/json" \
-      -H "Authorization: Bearer sk-1234" \
-      -d '{"instances":[{"content": "gm"}]}'
-    ```
-
-    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#generative-ai-get-text-embedding-drest
-    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
-    """
-    try:
-        response = await execute_post_vertex_ai_request(
-            request=request,
-            route=f"/publishers/google/models/{model_id}:predict",
-        )
-        return response
-    except Exception as e:
-        raise exception_handler(e) from e
-
-
-@router.post(
-    "/vertex-ai/publishers/google/models/{model_id:path}:countTokens",
-    dependencies=[Depends(user_api_key_auth)],
-    tags=["Vertex AI endpoints"],
-)
-async def vertex_countTokens_endpoint(
-    request: Request,
-    fastapi_response: Response,
-    model_id: str,
-    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
-):
-    """
-    this is a pass through endpoint for the Vertex AI API. /countTokens endpoint
-    https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/count-tokens#curl
-
-
-    Example Curl:
-    ```
-    curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
-      -H "Content-Type: application/json" \
-      -H "Authorization: Bearer sk-1234" \
-      -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
-    ```
-
-    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
-    """
-    try:
-        response = await execute_post_vertex_ai_request(
-            request=request,
-            route=f"/publishers/google/models/{model_id}:countTokens",
-        )
-        return response
-    except Exception as e:
-        raise exception_handler(e) from e
-
-
-@router.post(
-    "/vertex-ai/batchPredictionJobs",
-    dependencies=[Depends(user_api_key_auth)],
-    tags=["Vertex AI endpoints"],
-)
-async def vertex_create_batch_prediction_job(
-    request: Request,
-    fastapi_response: Response,
-    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
-):
-    """
-    this is a pass through endpoint for the Vertex AI API. /batchPredictionJobs endpoint
-
-    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/batch-prediction-api#syntax
-
-    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
-    """
-    try:
-        response = await execute_post_vertex_ai_request(
-            request=request,
-            route="/batchPredictionJobs",
-        )
-        return response
-    except Exception as e:
-        raise exception_handler(e) from e
-
-
-@router.post(
-    "/vertex-ai/tuningJobs",
-    dependencies=[Depends(user_api_key_auth)],
-    tags=["Vertex AI endpoints"],
-)
-async def vertex_create_fine_tuning_job(
-    request: Request,
-    fastapi_response: Response,
-    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
-):
-    """
-    this is a pass through endpoint for the Vertex AI API. /tuningJobs endpoint
-
-    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/tuning
-
-    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
-    """
-    try:
-        response = await execute_post_vertex_ai_request(
-            request=request,
-            route="/tuningJobs",
-        )
-        return response
-    except Exception as e:
-        raise exception_handler(e) from e
-
-
-@router.post(
-    "/vertex-ai/tuningJobs/{job_id:path}:cancel",
-    dependencies=[Depends(user_api_key_auth)],
-    tags=["Vertex AI endpoints"],
-)
-async def vertex_cancel_fine_tuning_job(
-    request: Request,
-    job_id: str,
-    fastapi_response: Response,
-    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
-):
-    """
-    this is a pass through endpoint for the Vertex AI API. tuningJobs/{job_id:path}:cancel
-
-    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/tuning#cancel_a_tuning_job
-
-    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
-    """
-    try:
-
-        response = await execute_post_vertex_ai_request(
-            request=request,
-            route=f"/tuningJobs/{job_id}:cancel",
-        )
-        return response
-    except Exception as e:
-        raise exception_handler(e) from e
-
-
-@router.post(
-    "/vertex-ai/cachedContents",
-    dependencies=[Depends(user_api_key_auth)],
-    tags=["Vertex AI endpoints"],
-)
-async def vertex_create_add_cached_content(
-    request: Request,
-    fastapi_response: Response,
-    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
-):
-    """
-    this is a pass through endpoint for the Vertex AI API. /cachedContents endpoint
-
-    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest
-
-    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
-    """
-    try:
-        response = await execute_post_vertex_ai_request(
-            request=request,
-            route="/cachedContents",
-        )
-        return response
-    except Exception as e:
-        raise exception_handler(e) from e
+    return received_value