From dd00cf2a970a6363556cd9204662073c9808902e Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 21 Aug 2024 14:25:47 -0700
Subject: [PATCH 1/5] add VertexMultimodalEmbeddingRequest type

---
 litellm/types/llms/vertex_ai.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/litellm/types/llms/vertex_ai.py b/litellm/types/llms/vertex_ai.py
index 6758c356f..5586d4861 100644
--- a/litellm/types/llms/vertex_ai.py
+++ b/litellm/types/llms/vertex_ai.py
@@ -1,6 +1,6 @@
 import json
 from enum import Enum
-from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
+from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
 
 from typing_extensions import (
     Protocol,
@@ -305,3 +305,18 @@ class ResponseTuningJob(TypedDict):
     ]
     createTime: Optional[str]
     updateTime: Optional[str]
+
+
+class InstanceVideo(TypedDict, total=False):
+    gcsUri: str
+    videoSegmentConfig: Tuple[float, float, float]
+
+
+class Instance(TypedDict, total=False):
+    text: str
+    image: Dict[str, str]
+    video: InstanceVideo
+
+
+class VertexMultimodalEmbeddingRequest(TypedDict, total=False):
+    instances: List[Instance]

From 7e3dc83c0d2f96a2a660b2473f8c4bc66a82d0be Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 21 Aug 2024 14:29:05 -0700
Subject: [PATCH 2/5] add initial support for multimodal_embedding vertex

---
 litellm/llms/vertex_httpx.py | 250 +++++++++++++++++++++++++++++++++++
 litellm/main.py              |  42 ++++--
 2 files changed, 279 insertions(+), 13 deletions(-)

diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index 1b0ef52bc..fea30b887 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -38,12 +38,15 @@ from litellm.types.llms.vertex_ai import (
     FunctionDeclaration,
     GenerateContentResponseBody,
     GenerationConfig,
+    Instance,
+    InstanceVideo,
     PartType,
     RequestBody,
     SafetSettingsConfig,
     SystemInstructions,
     ToolConfig,
     Tools,
+    VertexMultimodalEmbeddingRequest,
 )
 from litellm.types.utils import GenericStreamingChunk
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
@@ -1537,6 +1540,253 @@ class VertexLLM(BaseLLM):
 
         return model_response
 
+    def multimodal_embedding(
+        self,
+        model: str,
+        input: Union[list, str],
+        print_verbose,
+        model_response: litellm.EmbeddingResponse,
+        optional_params: dict,
+        api_key: Optional[str] = None,
+        logging_obj=None,
+        encoding=None,
+        vertex_project=None,
+        vertex_location=None,
+        vertex_credentials=None,
+        aembedding=False,
+        timeout=300,
+        client=None,
+    ):
+        # if aembedding is True:
+        #     return self.aimage_generation(
+        #         prompt=prompt,
+        #         vertex_project=vertex_project,
+        #         vertex_location=vertex_location,
+        #         vertex_credentials=vertex_credentials,
+        #         model=model,
+        #         client=client,
+        #         optional_params=optional_params,
+        #         timeout=timeout,
+        #         logging_obj=logging_obj,
+        #         model_response=model_response,
+        #     )
+
+        if client is None:
+            _params = {}
+            if timeout is not None:
+                if isinstance(timeout, float) or isinstance(timeout, int):
+                    _httpx_timeout = httpx.Timeout(timeout)
+                    _params["timeout"] = _httpx_timeout
+            else:
+                _params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
+
+            sync_handler: HTTPHandler = HTTPHandler(**_params)  # type: ignore
+        else:
+            sync_handler = client  # type: ignore
+
+        url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
+
+        auth_header, _ = self._ensure_access_token(
+            credentials=vertex_credentials, project_id=vertex_project
+        )
+        optional_params = optional_params or {}
+
+        request_data = VertexMultimodalEmbeddingRequest()
+        vertex_request_instance = Instance(**optional_params)
+
+        # if "image" in optional_params:
+        #     vertex_request_instance["image"] = optional_params["image"]
+
+        # if "video" in optional_params:
+        #     vertex_request_instance["video"] = optional_params["video"]
+
+        # if "text" in optional_params:
+        #     vertex_request_instance["text"] = optional_params["text"]
+        if isinstance(input, str):
+            vertex_request_instance["text"] = input
+
+        request_data["instances"] = [vertex_request_instance]
+
+        request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
+        logging_obj.pre_call(
+            input=input,
+            api_key=None,
+            additional_args={
+                "complete_input_dict": optional_params,
+                "request_str": request_str,
+            },
+        )
+
+        logging_obj.pre_call(
+            input=input,
+            api_key=None,
+            additional_args={
+                "complete_input_dict": optional_params,
+                "request_str": request_str,
+            },
+        )
+
+        response = sync_handler.post(
+            url=url,
+            headers={
+                "Content-Type": "application/json; charset=utf-8",
+                "Authorization": f"Bearer {auth_header}",
+            },
+            data=json.dumps(request_data),
+        )
+
+        if response.status_code != 200:
+            raise Exception(f"Error: {response.status_code} {response.text}")
+        """
+        Vertex AI Image generation response example:
+        {
+            "predictions": [
+                {
+                "bytesBase64Encoded": "BASE64_IMG_BYTES",
+                "mimeType": "image/png"
+                },
+                {
+                "mimeType": "image/png",
+                "bytesBase64Encoded": "BASE64_IMG_BYTES"
+                }
+            ]
+        }
+        """
+
+        _json_response = response.json()
+        if "predictions" not in _json_response:
+            raise litellm.InternalServerError(
+                message=f"embedding response does not contain 'predictions', got {_json_response}",
+                llm_provider="vertex_ai",
+                model=model,
+            )
+        _predictions = _json_response["predictions"]
+
+        model_response.data = _predictions
+        model_response.model = model
+
+        return model_response
+
+    # async def aimage_generation(
+    #     self,
+    #     prompt: str,
+    #     vertex_project: Optional[str],
+    #     vertex_location: Optional[str],
+    #     vertex_credentials: Optional[str],
+    #     model_response: litellm.ImageResponse,
+    #     model: Optional[
+    #         str
+    #     ] = "imagegeneration",  # vertex ai uses imagegeneration as the default model
+    #     client: Optional[AsyncHTTPHandler] = None,
+    #     optional_params: Optional[dict] = None,
+    #     timeout: Optional[int] = None,
+    #     logging_obj=None,
+    # ):
+    #     response = None
+    #     if client is None:
+    #         _params = {}
+    #         if timeout is not None:
+    #             if isinstance(timeout, float) or isinstance(timeout, int):
+    #                 _httpx_timeout = httpx.Timeout(timeout)
+    #                 _params["timeout"] = _httpx_timeout
+    #         else:
+    #             _params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
+
+    #         self.async_handler = AsyncHTTPHandler(**_params)  # type: ignore
+    #     else:
+    #         self.async_handler = client  # type: ignore
+
+    #     # make POST request to
+    #     # https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict
+    #     url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
+
+    #     """
+    #     Docs link: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagegeneration?project=adroit-crow-413218
+    #     curl -X POST \
+    #     -H "Authorization: Bearer $(gcloud auth print-access-token)" \
+    #     -H "Content-Type: application/json; charset=utf-8" \
+    #     -d {
+    #         "instances": [
+    #             {
+    #                 "prompt": "a cat"
+    #             }
+    #         ],
+    #         "parameters": {
+    #             "sampleCount": 1
+    #         }
+    #     } \
+    #     "https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
+    #     """
+    #     auth_header, _ = self._ensure_access_token(
+    #         credentials=vertex_credentials, project_id=vertex_project
+    #     )
+    #     optional_params = optional_params or {
+    #         "sampleCount": 1
+    #     }  # default optional params
+
+    #     request_data = {
+    #         "instances": [{"prompt": prompt}],
+    #         "parameters": optional_params,
+    #     }
+
+    #     request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
+    #     logging_obj.pre_call(
+    #         input=prompt,
+    #         api_key=None,
+    #         additional_args={
+    #             "complete_input_dict": optional_params,
+    #             "request_str": request_str,
+    #         },
+    #     )
+
+    #     response = await self.async_handler.post(
+    #         url=url,
+    #         headers={
+    #             "Content-Type": "application/json; charset=utf-8",
+    #             "Authorization": f"Bearer {auth_header}",
+    #         },
+    #         data=json.dumps(request_data),
+    #     )
+
+    #     if response.status_code != 200:
+    #         raise Exception(f"Error: {response.status_code} {response.text}")
+    #     """
+    #     Vertex AI Image generation response example:
+    #     {
+    #         "predictions": [
+    #             {
+    #             "bytesBase64Encoded": "BASE64_IMG_BYTES",
+    #             "mimeType": "image/png"
+    #             },
+    #             {
+    #             "mimeType": "image/png",
+    #             "bytesBase64Encoded": "BASE64_IMG_BYTES"
+    #             }
+    #         ]
+    #     }
+    #     """
+
+    #     _json_response = response.json()
+
+    #     if "predictions" not in _json_response:
+    #         raise litellm.InternalServerError(
+    #             message=f"image generation response does not contain 'predictions', got {_json_response}",
+    #             llm_provider="vertex_ai",
+    #             model=model,
+    #         )
+
+    #     _predictions = _json_response["predictions"]
+
+    #     _response_data: List[Image] = []
+    #     for _prediction in _predictions:
+    #         _bytes_base64_encoded = _prediction["bytesBase64Encoded"]
+    #         image_object = Image(b64_json=_bytes_base64_encoded)
+    #         _response_data.append(image_object)
+
+    #     model_response.data = _response_data
+
+    #     return model_response
+
 
 class ModelResponseIterator:
     def __init__(self, streaming_response, sync_stream: bool):
diff --git a/litellm/main.py b/litellm/main.py
index f2c6df306..08c1d5d8d 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -3477,19 +3477,35 @@ def embedding(
                 or get_secret("VERTEX_CREDENTIALS")
             )
 
-            response = vertex_ai.embedding(
-                model=model,
-                input=input,
-                encoding=encoding,
-                logging_obj=logging,
-                optional_params=optional_params,
-                model_response=EmbeddingResponse(),
-                vertex_project=vertex_ai_project,
-                vertex_location=vertex_ai_location,
-                vertex_credentials=vertex_credentials,
-                aembedding=aembedding,
-                print_verbose=print_verbose,
-            )
+            if "image" in optional_params or "video" in optional_params:
+                # multimodal embedding is supported on vertex httpx
+                response = vertex_chat_completion.multimodal_embedding(
+                    model=model,
+                    input=input,
+                    encoding=encoding,
+                    logging_obj=logging,
+                    optional_params=optional_params,
+                    model_response=EmbeddingResponse(),
+                    vertex_project=vertex_ai_project,
+                    vertex_location=vertex_ai_location,
+                    vertex_credentials=vertex_credentials,
+                    aembedding=aembedding,
+                    print_verbose=print_verbose,
+                )
+            else:
+                response = vertex_ai.embedding(
+                    model=model,
+                    input=input,
+                    encoding=encoding,
+                    logging_obj=logging,
+                    optional_params=optional_params,
+                    model_response=EmbeddingResponse(),
+                    vertex_project=vertex_ai_project,
+                    vertex_location=vertex_ai_location,
+                    vertex_credentials=vertex_credentials,
+                    aembedding=aembedding,
+                    print_verbose=print_verbose,
+                )
         elif custom_llm_provider == "oobabooga":
             response = oobabooga.embedding(
                 model=model,

From 35781ab8d502ed1c3e7e9c11bf72ee26e28094c5 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 21 Aug 2024 15:05:59 -0700
Subject: [PATCH 3/5] add multi modal vtx embedding

---
 litellm/llms/vertex_httpx.py                  | 231 ++++++------------
 litellm/main.py                               |   6 +-
 .../tests/test_amazing_vertex_completion.py   |  32 +++
 litellm/utils.py                              |   4 +-
 4 files changed, 109 insertions(+), 164 deletions(-)

diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index fea30b887..94fbd0a13 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -9,7 +9,7 @@ import types
 import uuid
 from enum import Enum
 from functools import partial
-from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
+from typing import Any, Callable, Coroutine, Dict, List, Literal, Optional, Tuple, Union
 
 import httpx  # type: ignore
 import requests  # type: ignore
@@ -597,6 +597,10 @@ class VertexLLM(BaseLLM):
         self._credentials: Optional[Any] = None
         self.project_id: Optional[str] = None
         self.async_handler: Optional[AsyncHTTPHandler] = None
+        self.SUPPORTED_MULTIMODAL_EMBEDDING_MODELS = [
+            "multimodalembedding",
+            "multimodalembedding@001",
+        ]
 
     def _process_response(
         self,
@@ -1557,19 +1561,6 @@ class VertexLLM(BaseLLM):
         timeout=300,
         client=None,
     ):
-        # if aembedding is True:
-        #     return self.aimage_generation(
-        #         prompt=prompt,
-        #         vertex_project=vertex_project,
-        #         vertex_location=vertex_location,
-        #         vertex_credentials=vertex_credentials,
-        #         model=model,
-        #         client=client,
-        #         optional_params=optional_params,
-        #         timeout=timeout,
-        #         logging_obj=logging_obj,
-        #         model_response=model_response,
-        #     )
 
         if client is None:
             _params = {}
@@ -1592,24 +1583,21 @@ class VertexLLM(BaseLLM):
         optional_params = optional_params or {}
 
         request_data = VertexMultimodalEmbeddingRequest()
-        vertex_request_instance = Instance(**optional_params)
 
-        # if "image" in optional_params:
-        #     vertex_request_instance["image"] = optional_params["image"]
+        if "instances" in optional_params:
+            request_data["instances"] = optional_params["instances"]
+        else:
+            # construct instances
+            vertex_request_instance = Instance(**optional_params)
 
-        # if "video" in optional_params:
-        #     vertex_request_instance["video"] = optional_params["video"]
+            if isinstance(input, str):
+                vertex_request_instance["text"] = input
 
-        # if "text" in optional_params:
-        #     vertex_request_instance["text"] = optional_params["text"]
-        if isinstance(input, str):
-            vertex_request_instance["text"] = input
-
-        request_data["instances"] = [vertex_request_instance]
+            request_data["instances"] = [vertex_request_instance]
 
         request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
         logging_obj.pre_call(
-            input=input,
+            input=[],
             api_key=None,
             additional_args={
                 "complete_input_dict": optional_params,
@@ -1618,7 +1606,7 @@ class VertexLLM(BaseLLM):
         )
 
         logging_obj.pre_call(
-            input=input,
+            input=[],
             api_key=None,
             additional_args={
                 "complete_input_dict": optional_params,
@@ -1626,32 +1614,30 @@ class VertexLLM(BaseLLM):
             },
         )
 
+        headers = {
+            "Content-Type": "application/json; charset=utf-8",
+            "Authorization": f"Bearer {auth_header}",
+        }
+
+        if aembedding is True:
+            return self.async_multimodal_embedding(
+                model=model,
+                api_base=url,
+                data=request_data,
+                timeout=timeout,
+                headers=headers,
+                client=client,
+                model_response=model_response,
+            )
+
         response = sync_handler.post(
             url=url,
-            headers={
-                "Content-Type": "application/json; charset=utf-8",
-                "Authorization": f"Bearer {auth_header}",
-            },
+            headers=headers,
             data=json.dumps(request_data),
         )
 
         if response.status_code != 200:
             raise Exception(f"Error: {response.status_code} {response.text}")
-        """
-        Vertex AI Image generation response example:
-        {
-            "predictions": [
-                {
-                "bytesBase64Encoded": "BASE64_IMG_BYTES",
-                "mimeType": "image/png"
-                },
-                {
-                "mimeType": "image/png",
-                "bytesBase64Encoded": "BASE64_IMG_BYTES"
-                }
-            ]
-        }
-        """
 
         _json_response = response.json()
         if "predictions" not in _json_response:
@@ -1667,125 +1653,48 @@ class VertexLLM(BaseLLM):
 
         return model_response
 
-    # async def aimage_generation(
-    #     self,
-    #     prompt: str,
-    #     vertex_project: Optional[str],
-    #     vertex_location: Optional[str],
-    #     vertex_credentials: Optional[str],
-    #     model_response: litellm.ImageResponse,
-    #     model: Optional[
-    #         str
-    #     ] = "imagegeneration",  # vertex ai uses imagegeneration as the default model
-    #     client: Optional[AsyncHTTPHandler] = None,
-    #     optional_params: Optional[dict] = None,
-    #     timeout: Optional[int] = None,
-    #     logging_obj=None,
-    # ):
-    #     response = None
-    #     if client is None:
-    #         _params = {}
-    #         if timeout is not None:
-    #             if isinstance(timeout, float) or isinstance(timeout, int):
-    #                 _httpx_timeout = httpx.Timeout(timeout)
-    #                 _params["timeout"] = _httpx_timeout
-    #         else:
-    #             _params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
+    async def async_multimodal_embedding(
+        self,
+        model: str,
+        api_base: str,
+        data: VertexMultimodalEmbeddingRequest,
+        model_response: litellm.EmbeddingResponse,
+        timeout: Optional[Union[float, httpx.Timeout]],
+        headers={},
+        client: Optional[AsyncHTTPHandler] = None,
+    ) -> litellm.EmbeddingResponse:
+        if client is None:
+            _params = {}
+            if timeout is not None:
+                if isinstance(timeout, float) or isinstance(timeout, int):
+                    timeout = httpx.Timeout(timeout)
+                _params["timeout"] = timeout
+            client = AsyncHTTPHandler(**_params)  # type: ignore
+        else:
+            client = client  # type: ignore
 
-    #         self.async_handler = AsyncHTTPHandler(**_params)  # type: ignore
-    #     else:
-    #         self.async_handler = client  # type: ignore
+        try:
+            response = await client.post(api_base, headers=headers, json=data)  # type: ignore
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:
+            error_code = err.response.status_code
+            raise VertexAIError(status_code=error_code, message=err.response.text)
+        except httpx.TimeoutException:
+            raise VertexAIError(status_code=408, message="Timeout error occurred.")
 
-    #     # make POST request to
-    #     # https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict
-    #     url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
+        _json_response = response.json()
+        if "predictions" not in _json_response:
+            raise litellm.InternalServerError(
+                message=f"embedding response does not contain 'predictions', got {_json_response}",
+                llm_provider="vertex_ai",
+                model=model,
+            )
+        _predictions = _json_response["predictions"]
 
-    #     """
-    #     Docs link: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagegeneration?project=adroit-crow-413218
-    #     curl -X POST \
-    #     -H "Authorization: Bearer $(gcloud auth print-access-token)" \
-    #     -H "Content-Type: application/json; charset=utf-8" \
-    #     -d {
-    #         "instances": [
-    #             {
-    #                 "prompt": "a cat"
-    #             }
-    #         ],
-    #         "parameters": {
-    #             "sampleCount": 1
-    #         }
-    #     } \
-    #     "https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
-    #     """
-    #     auth_header, _ = self._ensure_access_token(
-    #         credentials=vertex_credentials, project_id=vertex_project
-    #     )
-    #     optional_params = optional_params or {
-    #         "sampleCount": 1
-    #     }  # default optional params
+        model_response.data = _predictions
+        model_response.model = model
 
-    #     request_data = {
-    #         "instances": [{"prompt": prompt}],
-    #         "parameters": optional_params,
-    #     }
-
-    #     request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
-    #     logging_obj.pre_call(
-    #         input=prompt,
-    #         api_key=None,
-    #         additional_args={
-    #             "complete_input_dict": optional_params,
-    #             "request_str": request_str,
-    #         },
-    #     )
-
-    #     response = await self.async_handler.post(
-    #         url=url,
-    #         headers={
-    #             "Content-Type": "application/json; charset=utf-8",
-    #             "Authorization": f"Bearer {auth_header}",
-    #         },
-    #         data=json.dumps(request_data),
-    #     )
-
-    #     if response.status_code != 200:
-    #         raise Exception(f"Error: {response.status_code} {response.text}")
-    #     """
-    #     Vertex AI Image generation response example:
-    #     {
-    #         "predictions": [
-    #             {
-    #             "bytesBase64Encoded": "BASE64_IMG_BYTES",
-    #             "mimeType": "image/png"
-    #             },
-    #             {
-    #             "mimeType": "image/png",
-    #             "bytesBase64Encoded": "BASE64_IMG_BYTES"
-    #             }
-    #         ]
-    #     }
-    #     """
-
-    #     _json_response = response.json()
-
-    #     if "predictions" not in _json_response:
-    #         raise litellm.InternalServerError(
-    #             message=f"image generation response does not contain 'predictions', got {_json_response}",
-    #             llm_provider="vertex_ai",
-    #             model=model,
-    #         )
-
-    #     _predictions = _json_response["predictions"]
-
-    #     _response_data: List[Image] = []
-    #     for _prediction in _predictions:
-    #         _bytes_base64_encoded = _prediction["bytesBase64Encoded"]
-    #         image_object = Image(b64_json=_bytes_base64_encoded)
-    #         _response_data.append(image_object)
-
-    #     model_response.data = _response_data
-
-    #     return model_response
+        return model_response
 
 
 class ModelResponseIterator:
diff --git a/litellm/main.py b/litellm/main.py
index 08c1d5d8d..ee327c2f7 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -3477,7 +3477,11 @@ def embedding(
                 or get_secret("VERTEX_CREDENTIALS")
             )
 
-            if "image" in optional_params or "video" in optional_params:
+            if (
+                "image" in optional_params
+                or "video" in optional_params
+                or model in vertex_chat_completion.SUPPORTED_MULTIMODAL_EMBEDDING_MODELS
+            ):
                 # multimodal embedding is supported on vertex httpx
                 response = vertex_chat_completion.multimodal_embedding(
                     model=model,
diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index fca4f1ee5..b7fc33241 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -1826,6 +1826,38 @@ def test_vertexai_embedding():
         pytest.fail(f"Error occurred: {e}")
 
 
+@pytest.mark.asyncio()
+async def test_vertexai_multimodal_embedding():
+    image_path = "../proxy/cached_logo.jpg"
+    # Getting the base64 string
+    base64_image = encode_image(image_path)
+    print("base 64 img ", base64_image)
+    try:
+        litellm.set_verbose = True
+        response = await litellm.aembedding(
+            model="vertex_ai/multimodalembedding@001",
+            instances=[
+                {
+                    "image": {
+                        "gcsUri": "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"
+                    },
+                    "text": "this is a unicorn",
+                },
+            ],
+        )
+        print(f"response:", response)
+        assert response.model == "multimodalembedding@001"
+
+        _response_data = response.data[0]
+
+        assert "imageEmbedding" in _response_data
+        assert "textEmbedding" in _response_data
+    except litellm.RateLimitError as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
 @pytest.mark.skip(
     reason="new test - works locally running into vertex version issues on ci/cd"
 )
diff --git a/litellm/utils.py b/litellm/utils.py
index a6d48dd31..7a3f97718 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -121,7 +121,7 @@ import importlib.metadata
 from openai import OpenAIError as OriginalError
 
 from ._logging import verbose_logger
-from .caching import RedisCache, RedisSemanticCache, S3Cache, QdrantSemanticCache
+from .caching import QdrantSemanticCache, RedisCache, RedisSemanticCache, S3Cache
 from .exceptions import (
     APIConnectionError,
     APIError,
@@ -541,7 +541,7 @@ def function_setup(
             call_type == CallTypes.embedding.value
             or call_type == CallTypes.aembedding.value
         ):
-            messages = args[1] if len(args) > 1 else kwargs["input"]
+            messages = args[1] if len(args) > 1 else kwargs.get("input", None)
         elif (
             call_type == CallTypes.image_generation.value
             or call_type == CallTypes.aimage_generation.value

From 0e9efb36698aa81cc57b8822a0d71c9096c9c7ab Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 21 Aug 2024 15:19:48 -0700
Subject: [PATCH 4/5] feat add multimodal embeddings on vertex

---
 litellm/llms/vertex_httpx.py                    | 2 ++
 litellm/tests/test_amazing_vertex_completion.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index 94fbd0a13..c784bf2e8 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -1586,6 +1586,8 @@ class VertexLLM(BaseLLM):
 
         if "instances" in optional_params:
             request_data["instances"] = optional_params["instances"]
+        elif isinstance(input, list):
+            request_data["instances"] = input
         else:
             # construct instances
             vertex_request_instance = Instance(**optional_params)
diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index b7fc33241..75868a2f4 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -1836,7 +1836,7 @@ async def test_vertexai_multimodal_embedding():
         litellm.set_verbose = True
         response = await litellm.aembedding(
             model="vertex_ai/multimodalembedding@001",
-            instances=[
+            input=[
                 {
                     "image": {
                         "gcsUri": "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"

From 2c7f5a0d275440d914fb50fda27b264092343296 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 21 Aug 2024 15:25:13 -0700
Subject: [PATCH 5/5] fix test test_vertexai_multimodal_embedding

---
 litellm/tests/test_amazing_vertex_completion.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index 75868a2f4..e142f5696 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -1828,10 +1828,8 @@ def test_vertexai_embedding():
 
 @pytest.mark.asyncio()
 async def test_vertexai_multimodal_embedding():
-    image_path = "../proxy/cached_logo.jpg"
-    # Getting the base64 string
-    base64_image = encode_image(image_path)
-    print("base 64 img ", base64_image)
+    load_vertex_ai_credentials()
+
     try:
         litellm.set_verbose = True
         response = await litellm.aembedding(