From 86596c53e98e611984e827ec122b19f45c93a9ab Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 6 Jul 2024 20:08:52 -0700
Subject: [PATCH] refactor(main.py): migrate vertex gemini calls to
 vertex_httpx

Completes migration to vertex_httpx
---
 litellm/__init__.py                           |   8 +-
 litellm/llms/vertex_ai.py                     | 203 +-----------------
 litellm/llms/vertex_httpx.py                  | 110 +++++++++-
 litellm/main.py                               |  26 ++-
 .../tests/test_amazing_vertex_completion.py   |   3 +-
 litellm/utils.py                              |  15 +-
 6 files changed, 159 insertions(+), 206 deletions(-)

diff --git a/litellm/__init__.py b/litellm/__init__.py
index cc67cd00ca..5a9b1dcb95 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -800,8 +800,12 @@ from .llms.gemini import GeminiConfig
 from .llms.nlp_cloud import NLPCloudConfig
 from .llms.aleph_alpha import AlephAlphaConfig
 from .llms.petals import PetalsConfig
-from .llms.vertex_httpx import VertexGeminiConfig, GoogleAIStudioGeminiConfig
-from .llms.vertex_ai import VertexAIConfig, VertexAITextEmbeddingConfig
+from .llms.vertex_httpx import (
+    VertexGeminiConfig,
+    GoogleAIStudioGeminiConfig,
+    VertexAIConfig,
+)
+from .llms.vertex_ai import VertexAITextEmbeddingConfig
 from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig
 from .llms.sagemaker import SagemakerConfig
 from .llms.ollama import OllamaConfig
diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py
index 8db4b6e85e..95a549234a 100644
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@@ -42,201 +42,6 @@ class VertexAIError(Exception):
         )  # Call the base class constructor with the parameters it needs
 
 
-class ExtendedGenerationConfig(dict):
-    """Extended parameters for the generation."""
-
-    def __init__(
-        self,
-        *,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        top_k: Optional[int] = None,
-        candidate_count: Optional[int] = None,
-        max_output_tokens: Optional[int] = None,
-        stop_sequences: Optional[List[str]] = None,
-        response_mime_type: Optional[str] = None,
-        frequency_penalty: Optional[float] = None,
-        presence_penalty: Optional[float] = None,
-    ):
-        super().__init__(
-            temperature=temperature,
-            top_p=top_p,
-            top_k=top_k,
-            candidate_count=candidate_count,
-            max_output_tokens=max_output_tokens,
-            stop_sequences=stop_sequences,
-            response_mime_type=response_mime_type,
-            frequency_penalty=frequency_penalty,
-            presence_penalty=presence_penalty,
-        )
-
-
-class VertexAIConfig:
-    """
-    Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts
-    Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference
-
-    The class `VertexAIConfig` provides configuration for the VertexAI's API interface. Below are the parameters:
-
-    - `temperature` (float): This controls the degree of randomness in token selection.
-
-    - `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
-
-    - `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
-
-    - `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
-
-    - `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'.
-
-    - `candidate_count` (int): Number of generated responses to return.
-
-    - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
-
-    - `frequency_penalty` (float): This parameter is used to penalize the model from repeating the same output. The default value is 0.0.
-
-    - `presence_penalty` (float): This parameter is used to penalize the model from generating the same output as the input. The default value is 0.0.
-
-    Note: Please make sure to modify the default parameters as required for your use case.
-    """
-
-    temperature: Optional[float] = None
-    max_output_tokens: Optional[int] = None
-    top_p: Optional[float] = None
-    top_k: Optional[int] = None
-    response_mime_type: Optional[str] = None
-    candidate_count: Optional[int] = None
-    stop_sequences: Optional[list] = None
-    frequency_penalty: Optional[float] = None
-    presence_penalty: Optional[float] = None
-
-    def __init__(
-        self,
-        temperature: Optional[float] = None,
-        max_output_tokens: Optional[int] = None,
-        top_p: Optional[float] = None,
-        top_k: Optional[int] = None,
-        response_mime_type: Optional[str] = None,
-        candidate_count: Optional[int] = None,
-        stop_sequences: Optional[list] = None,
-        frequency_penalty: Optional[float] = None,
-        presence_penalty: Optional[float] = None,
-    ) -> None:
-        locals_ = locals()
-        for key, value in locals_.items():
-            if key != "self" and value is not None:
-                setattr(self.__class__, key, value)
-
-    @classmethod
-    def get_config(cls):
-        return {
-            k: v
-            for k, v in cls.__dict__.items()
-            if not k.startswith("__")
-            and not isinstance(
-                v,
-                (
-                    types.FunctionType,
-                    types.BuiltinFunctionType,
-                    classmethod,
-                    staticmethod,
-                ),
-            )
-            and v is not None
-        }
-
-    def get_supported_openai_params(self):
-        return [
-            "temperature",
-            "top_p",
-            "max_tokens",
-            "stream",
-            "tools",
-            "tool_choice",
-            "response_format",
-            "n",
-            "stop",
-            "extra_headers",
-        ]
-
-    def map_openai_params(self, non_default_params: dict, optional_params: dict):
-        for param, value in non_default_params.items():
-            if param == "temperature":
-                optional_params["temperature"] = value
-            if param == "top_p":
-                optional_params["top_p"] = value
-            if (
-                param == "stream" and value == True
-            ):  # sending stream = False, can cause it to get passed unchecked and raise issues
-                optional_params["stream"] = value
-            if param == "n":
-                optional_params["candidate_count"] = value
-            if param == "stop":
-                if isinstance(value, str):
-                    optional_params["stop_sequences"] = [value]
-                elif isinstance(value, list):
-                    optional_params["stop_sequences"] = value
-            if param == "max_tokens":
-                optional_params["max_output_tokens"] = value
-            if param == "response_format" and value["type"] == "json_object":
-                optional_params["response_mime_type"] = "application/json"
-            if param == "frequency_penalty":
-                optional_params["frequency_penalty"] = value
-            if param == "presence_penalty":
-                optional_params["presence_penalty"] = value
-            if param == "tools" and isinstance(value, list):
-                from vertexai.preview import generative_models
-
-                gtool_func_declarations = []
-                for tool in value:
-                    gtool_func_declaration = generative_models.FunctionDeclaration(
-                        name=tool["function"]["name"],
-                        description=tool["function"].get("description", ""),
-                        parameters=tool["function"].get("parameters", {}),
-                    )
-                    gtool_func_declarations.append(gtool_func_declaration)
-                optional_params["tools"] = [
-                    generative_models.Tool(
-                        function_declarations=gtool_func_declarations
-                    )
-                ]
-            if param == "tool_choice" and (
-                isinstance(value, str) or isinstance(value, dict)
-            ):
-                pass
-        return optional_params
-
-    def get_mapped_special_auth_params(self) -> dict:
-        """
-        Common auth params across bedrock/vertex_ai/azure/watsonx
-        """
-        return {"project": "vertex_project", "region_name": "vertex_location"}
-
-    def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
-        mapped_params = self.get_mapped_special_auth_params()
-
-        for param, value in non_default_params.items():
-            if param in mapped_params:
-                optional_params[mapped_params[param]] = value
-        return optional_params
-
-    def get_eu_regions(self) -> List[str]:
-        """
-        Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#available-regions
-        """
-        return [
-            "europe-central2",
-            "europe-north1",
-            "europe-southwest1",
-            "europe-west1",
-            "europe-west2",
-            "europe-west3",
-            "europe-west4",
-            "europe-west6",
-            "europe-west8",
-            "europe-west9",
-        ]
-
-
 import asyncio
 
 
@@ -445,6 +250,14 @@ def completion(
     logger_fn=None,
     acompletion: bool = False,
 ):
+    """
+    NON-GEMINI/ANTHROPIC CALLS.
+
+    This is the handler for OLDER PALM MODELS and VERTEX AI MODEL GARDEN
+
+    For Vertex AI Anthropic: `vertex_anthropic.py`
+    For Gemini: `vertex_httpx.py`
+    """
     try:
         import vertexai
     except:
diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index f7aa2d5932..ce59ffeb09 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -50,6 +50,111 @@ from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
 from .base import BaseLLM
 
 
+class VertexAIConfig:
+    """
+    Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts
+    Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference
+
+    The class `VertexAIConfig` provides configuration for the VertexAI's API interface. Below are the parameters:
+
+    - `temperature` (float): This controls the degree of randomness in token selection.
+
+    - `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
+
+    - `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
+
+    - `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
+
+    - `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'.
+
+    - `candidate_count` (int): Number of generated responses to return.
+
+    - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
+
+    - `frequency_penalty` (float): This parameter is used to penalize the model from repeating the same output. The default value is 0.0.
+
+    - `presence_penalty` (float): This parameter is used to penalize the model from generating the same output as the input. The default value is 0.0.
+
+    Note: Please make sure to modify the default parameters as required for your use case.
+    """
+
+    temperature: Optional[float] = None
+    max_output_tokens: Optional[int] = None
+    top_p: Optional[float] = None
+    top_k: Optional[int] = None
+    response_mime_type: Optional[str] = None
+    candidate_count: Optional[int] = None
+    stop_sequences: Optional[list] = None
+    frequency_penalty: Optional[float] = None
+    presence_penalty: Optional[float] = None
+
+    def __init__(
+        self,
+        temperature: Optional[float] = None,
+        max_output_tokens: Optional[int] = None,
+        top_p: Optional[float] = None,
+        top_k: Optional[int] = None,
+        response_mime_type: Optional[str] = None,
+        candidate_count: Optional[int] = None,
+        stop_sequences: Optional[list] = None,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_mapped_special_auth_params(self) -> dict:
+        """
+        Common auth params across bedrock/vertex_ai/azure/watsonx
+        """
+        return {"project": "vertex_project", "region_name": "vertex_location"}
+
+    def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
+        mapped_params = self.get_mapped_special_auth_params()
+
+        for param, value in non_default_params.items():
+            if param in mapped_params:
+                optional_params[mapped_params[param]] = value
+        return optional_params
+
+    def get_eu_regions(self) -> List[str]:
+        """
+        Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#available-regions
+        """
+        return [
+            "europe-central2",
+            "europe-north1",
+            "europe-southwest1",
+            "europe-west1",
+            "europe-west2",
+            "europe-west3",
+            "europe-west4",
+            "europe-west6",
+            "europe-west8",
+            "europe-west9",
+        ]
+
+
 class GoogleAIStudioGeminiConfig:  # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported
     """
     Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig
@@ -326,6 +431,7 @@ class VertexGeminiConfig:
             "stop",
             "frequency_penalty",
             "presence_penalty",
+            "extra_headers",
         ]
 
     def map_tool_choice_values(
@@ -691,7 +797,9 @@ class VertexLLM(BaseLLM):
                     )
                     tools.append(_tool_response_chunk)
 
-                chat_completion_message["content"] = content_str
+                chat_completion_message["content"] = (
+                    content_str if len(content_str) > 0 else None
+                )
                 chat_completion_message["tool_calls"] = tools
 
                 choice = litellm.Choices(
diff --git a/litellm/main.py b/litellm/main.py
index 37ae125b99..76dd33dd6e 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -2080,6 +2080,28 @@ def completion(
                     headers=headers,
                     custom_prompt_dict=custom_prompt_dict,
                 )
+            elif "gemini" in model:
+                model_response = vertex_chat_completion.completion(  # type: ignore
+                    model=model,
+                    messages=messages,
+                    model_response=model_response,
+                    print_verbose=print_verbose,
+                    optional_params=new_params,
+                    litellm_params=litellm_params,
+                    logger_fn=logger_fn,
+                    encoding=encoding,
+                    vertex_location=vertex_ai_location,
+                    vertex_project=vertex_ai_project,
+                    vertex_credentials=vertex_credentials,
+                    gemini_api_key=None,
+                    logging_obj=logging,
+                    acompletion=acompletion,
+                    timeout=timeout,
+                    custom_llm_provider=custom_llm_provider,
+                    client=client,
+                    api_base=api_base,
+                    extra_headers=extra_headers,
+                )
             else:
                 model_response = vertex_ai.completion(
                     model=model,
@@ -2099,8 +2121,8 @@ def completion(
 
                 if (
                     "stream" in optional_params
-                    and optional_params["stream"] == True
-                    and acompletion == False
+                    and optional_params["stream"] is True
+                    and acompletion is False
                 ):
                     response = CustomStreamWrapper(
                         model_response,
diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index 9c11a42484..5faa9e6afa 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -501,7 +501,7 @@ async def test_async_vertexai_streaming_response():
             user_message = "Hello, how are you?"
             messages = [{"content": user_message, "role": "user"}]
             response = await acompletion(
-                model="gemini-pro",
+                model=model,
                 messages=messages,
                 temperature=0.7,
                 timeout=5,
@@ -1311,6 +1311,7 @@ async def test_gemini_pro_async_function_calling():
             model="gemini-pro", messages=messages, tools=tools, tool_choice="auto"
         )
         print(f"completion: {completion}")
+        print(f"message content: {completion.choices[0].message.content}")
         assert completion.choices[0].message.content is None
         assert len(completion.choices[0].message.tool_calls) == 1
 
diff --git a/litellm/utils.py b/litellm/utils.py
index 13be18422c..408b2ffad9 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2824,7 +2824,6 @@ def get_optional_params(
         or model in litellm.vertex_text_models
         or model in litellm.vertex_code_text_models
         or model in litellm.vertex_language_models
-        or model in litellm.vertex_embedding_models
         or model in litellm.vertex_vision_models
     ):
         print_verbose(f"(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK")
@@ -2834,9 +2833,15 @@ def get_optional_params(
         )
         _check_valid_arg(supported_params=supported_params)
 
-        optional_params = litellm.VertexAIConfig().map_openai_params(
+        optional_params = litellm.VertexGeminiConfig().map_openai_params(
             non_default_params=non_default_params,
             optional_params=optional_params,
+            model=model,
+            drop_params=(
+                drop_params
+                if drop_params is not None and isinstance(drop_params, bool)
+                else False
+            ),
         )
 
         print_verbose(
@@ -2852,7 +2857,7 @@ def get_optional_params(
             optional_params=optional_params,
             model=model,
         )
-    elif custom_llm_provider == "vertex_ai_beta" or custom_llm_provider == "gemini":
+    elif custom_llm_provider == "vertex_ai_beta":
         supported_params = get_supported_openai_params(
             model=model, custom_llm_provider=custom_llm_provider
         )
@@ -3936,12 +3941,12 @@ def get_supported_openai_params(
         return litellm.GoogleAIStudioGeminiConfig().get_supported_openai_params()
     elif custom_llm_provider == "vertex_ai":
         if request_type == "chat_completion":
-            return litellm.VertexAIConfig().get_supported_openai_params()
+            return litellm.VertexGeminiConfig().get_supported_openai_params()
         elif request_type == "embeddings":
             return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params()
     elif custom_llm_provider == "vertex_ai_beta":
         if request_type == "chat_completion":
-            return litellm.VertexAIConfig().get_supported_openai_params()
+            return litellm.VertexGeminiConfig().get_supported_openai_params()
         elif request_type == "embeddings":
             return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params()
     elif custom_llm_provider == "sagemaker":