diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py
index 2d3e8d1c38..f964e7bba2 100644
--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@@ -272,7 +272,8 @@ class VertexFineTuningAPI(VertexLLM):
             url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs{request_route}"
         elif "generateContent" in request_route:
             url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
-
+        elif "predict" in request_route:
+            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
         if self.async_handler is None:
             raise ValueError("VertexAI Fine Tuning - async_handler is not initialized")
 
diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
index 7ef552508b..da63aa0250 100644
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@@ -143,6 +143,45 @@ async def vertex_generate_content(
         raise exception_handler(e) from e
 
 
+@router.post(
+    "/vertex-ai/publishers/google/models/{model_id:path}:predict",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["Vertex AI endpoints"],
+)
+async def vertex_predict_endpoint(
+    request: Request,
+    fastapi_response: Response,
+    model_id: str,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    this is a pass through endpoint for the Vertex AI API. /predict endpoint
+    Use this for:
+    - Embeddings API - Text Embedding, Multi Modal Embedding
+    - Imagen API
+    - Code Completion API
+
+    Example Curl:
+    ```
+    curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \
+      -H "Content-Type: application/json" \
+      -H "Authorization: Bearer sk-1234" \
+      -d '{"instances":[{"content": "gm"}]}'
+    ```
+
+    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#generative-ai-get-text-embedding-drest
+    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
+    """
+    try:
+        response = await execute_post_vertex_ai_request(
+            request=request,
+            route=f"/publishers/google/models/{model_id}:predict",
+        )
+        return response
+    except Exception as e:
+        raise exception_handler(e) from e
+
+
 @router.post(
     "/vertex-ai/tuningJobs",
     dependencies=[Depends(user_api_key_auth)],