add initial support for multimodal_embedding vertex

2025-04-26 11:14:04 +00:00 · 2024-08-21 14:29:05 -07:00 · 2024-08-21 14:29:05 -07:00 · be6eb52036
commit be6eb52036
parent 710ae63957
2 changed files with 279 additions and 13 deletions
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -38,12 +38,15 @@ from litellm.types.llms.vertex_ai import (
    FunctionDeclaration,
    GenerateContentResponseBody,
    GenerationConfig,
+    Instance,
+    InstanceVideo,
    PartType,
    RequestBody,
    SafetSettingsConfig,
    SystemInstructions,
    ToolConfig,
    Tools,
+    VertexMultimodalEmbeddingRequest,
 )
 from litellm.types.utils import GenericStreamingChunk
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
@ -1537,6 +1540,253 @@ class VertexLLM(BaseLLM):

        return model_response

+    def multimodal_embedding(
+        self,
+        model: str,
+        input: Union[list, str],
+        print_verbose,
+        model_response: litellm.EmbeddingResponse,
+        optional_params: dict,
+        api_key: Optional[str] = None,
+        logging_obj=None,
+        encoding=None,
+        vertex_project=None,
+        vertex_location=None,
+        vertex_credentials=None,
+        aembedding=False,
+        timeout=300,
+        client=None,
+    ):
+        # if aembedding is True:
+        #     return self.aimage_generation(
+        #         prompt=prompt,
+        #         vertex_project=vertex_project,
+        #         vertex_location=vertex_location,
+        #         vertex_credentials=vertex_credentials,
+        #         model=model,
+        #         client=client,
+        #         optional_params=optional_params,
+        #         timeout=timeout,
+        #         logging_obj=logging_obj,
+        #         model_response=model_response,
+        #     )
+
+        if client is None:
+            _params = {}
+            if timeout is not None:
+                if isinstance(timeout, float) or isinstance(timeout, int):
+                    _httpx_timeout = httpx.Timeout(timeout)
+                    _params["timeout"] = _httpx_timeout
+            else:
+                _params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
+
+            sync_handler: HTTPHandler = HTTPHandler(**_params)  # type: ignore
+        else:
+            sync_handler = client  # type: ignore
+
+        url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
+
+        auth_header, _ = self._ensure_access_token(
+            credentials=vertex_credentials, project_id=vertex_project
+        )
+        optional_params = optional_params or {}
+
+        request_data = VertexMultimodalEmbeddingRequest()
+        vertex_request_instance = Instance(**optional_params)
+
+        # if "image" in optional_params:
+        #     vertex_request_instance["image"] = optional_params["image"]
+
+        # if "video" in optional_params:
+        #     vertex_request_instance["video"] = optional_params["video"]
+
+        # if "text" in optional_params:
+        #     vertex_request_instance["text"] = optional_params["text"]
+        if isinstance(input, str):
+            vertex_request_instance["text"] = input
+
+        request_data["instances"] = [vertex_request_instance]
+
+        request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
+        logging_obj.pre_call(
+            input=input,
+            api_key=None,
+            additional_args={
+                "complete_input_dict": optional_params,
+                "request_str": request_str,
+            },
+        )
+
+        logging_obj.pre_call(
+            input=input,
+            api_key=None,
+            additional_args={
+                "complete_input_dict": optional_params,
+                "request_str": request_str,
+            },
+        )
+
+        response = sync_handler.post(
+            url=url,
+            headers={
+                "Content-Type": "application/json; charset=utf-8",
+                "Authorization": f"Bearer {auth_header}",
+            },
+            data=json.dumps(request_data),
+        )
+
+        if response.status_code != 200:
+            raise Exception(f"Error: {response.status_code} {response.text}")
+        """
+        Vertex AI Image generation response example:
+        {
+            "predictions": [
+                {
+                "bytesBase64Encoded": "BASE64_IMG_BYTES",
+                "mimeType": "image/png"
+                },
+                {
+                "mimeType": "image/png",
+                "bytesBase64Encoded": "BASE64_IMG_BYTES"
+                }
+            ]
+        }
+        """
+
+        _json_response = response.json()
+        if "predictions" not in _json_response:
+            raise litellm.InternalServerError(
+                message=f"embedding response does not contain 'predictions', got {_json_response}",
+                llm_provider="vertex_ai",
+                model=model,
+            )
+        _predictions = _json_response["predictions"]
+
+        model_response.data = _predictions
+        model_response.model = model
+
+        return model_response
+
+    # async def aimage_generation(
+    #     self,
+    #     prompt: str,
+    #     vertex_project: Optional[str],
+    #     vertex_location: Optional[str],
+    #     vertex_credentials: Optional[str],
+    #     model_response: litellm.ImageResponse,
+    #     model: Optional[
+    #         str
+    #     ] = "imagegeneration",  # vertex ai uses imagegeneration as the default model
+    #     client: Optional[AsyncHTTPHandler] = None,
+    #     optional_params: Optional[dict] = None,
+    #     timeout: Optional[int] = None,
+    #     logging_obj=None,
+    # ):
+    #     response = None
+    #     if client is None:
+    #         _params = {}
+    #         if timeout is not None:
+    #             if isinstance(timeout, float) or isinstance(timeout, int):
+    #                 _httpx_timeout = httpx.Timeout(timeout)
+    #                 _params["timeout"] = _httpx_timeout
+    #         else:
+    #             _params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
+
+    #         self.async_handler = AsyncHTTPHandler(**_params)  # type: ignore
+    #     else:
+    #         self.async_handler = client  # type: ignore
+
+    #     # make POST request to
+    #     # https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict
+    #     url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
+
+    #     """
+    #     Docs link: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagegeneration?project=adroit-crow-413218
+    #     curl -X POST \
+    #     -H "Authorization: Bearer $(gcloud auth print-access-token)" \
+    #     -H "Content-Type: application/json; charset=utf-8" \
+    #     -d {
+    #         "instances": [
+    #             {
+    #                 "prompt": "a cat"
+    #             }
+    #         ],
+    #         "parameters": {
+    #             "sampleCount": 1
+    #         }
+    #     } \
+    #     "https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
+    #     """
+    #     auth_header, _ = self._ensure_access_token(
+    #         credentials=vertex_credentials, project_id=vertex_project
+    #     )
+    #     optional_params = optional_params or {
+    #         "sampleCount": 1
+    #     }  # default optional params
+
+    #     request_data = {
+    #         "instances": [{"prompt": prompt}],
+    #         "parameters": optional_params,
+    #     }
+
+    #     request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
+    #     logging_obj.pre_call(
+    #         input=prompt,
+    #         api_key=None,
+    #         additional_args={
+    #             "complete_input_dict": optional_params,
+    #             "request_str": request_str,
+    #         },
+    #     )
+
+    #     response = await self.async_handler.post(
+    #         url=url,
+    #         headers={
+    #             "Content-Type": "application/json; charset=utf-8",
+    #             "Authorization": f"Bearer {auth_header}",
+    #         },
+    #         data=json.dumps(request_data),
+    #     )
+
+    #     if response.status_code != 200:
+    #         raise Exception(f"Error: {response.status_code} {response.text}")
+    #     """
+    #     Vertex AI Image generation response example:
+    #     {
+    #         "predictions": [
+    #             {
+    #             "bytesBase64Encoded": "BASE64_IMG_BYTES",
+    #             "mimeType": "image/png"
+    #             },
+    #             {
+    #             "mimeType": "image/png",
+    #             "bytesBase64Encoded": "BASE64_IMG_BYTES"
+    #             }
+    #         ]
+    #     }
+    #     """
+
+    #     _json_response = response.json()
+
+    #     if "predictions" not in _json_response:
+    #         raise litellm.InternalServerError(
+    #             message=f"image generation response does not contain 'predictions', got {_json_response}",
+    #             llm_provider="vertex_ai",
+    #             model=model,
+    #         )
+
+    #     _predictions = _json_response["predictions"]
+
+    #     _response_data: List[Image] = []
+    #     for _prediction in _predictions:
+    #         _bytes_base64_encoded = _prediction["bytesBase64Encoded"]
+    #         image_object = Image(b64_json=_bytes_base64_encoded)
+    #         _response_data.append(image_object)
+
+    #     model_response.data = _response_data
+
+    #     return model_response
+

 class ModelResponseIterator:
    def __init__(self, streaming_response, sync_stream: bool):
--- a/litellm/main.py
+++ b/litellm/main.py
@ -3477,6 +3477,22 @@ def embedding(
                or get_secret("VERTEX_CREDENTIALS")
            )

+            if "image" in optional_params or "video" in optional_params:
+                # multimodal embedding is supported on vertex httpx
+                response = vertex_chat_completion.multimodal_embedding(
+                    model=model,
+                    input=input,
+                    encoding=encoding,
+                    logging_obj=logging,
+                    optional_params=optional_params,
+                    model_response=EmbeddingResponse(),
+                    vertex_project=vertex_ai_project,
+                    vertex_location=vertex_ai_location,
+                    vertex_credentials=vertex_credentials,
+                    aembedding=aembedding,
+                    print_verbose=print_verbose,
+                )
+            else:
                response = vertex_ai.embedding(
                    model=model,
                    input=input,