From 649c3bb0ddcf014bc397c521c97f7058bcee3740 Mon Sep 17 00:00:00 2001
From: David Manouchehri <david.manouchehri@ai.moda>
Date: Thu, 11 Apr 2024 22:49:25 +0000
Subject: [PATCH] (feat) - Add support for JSON mode in Vertex AI

---
 litellm/llms/vertex_ai.py | 13 +++++++++++++
 litellm/utils.py          | 12 ++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py
index ddfc8e9fb7..9a5e928281 100644
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@@ -25,6 +25,7 @@ class VertexAIError(Exception):
 class VertexAIConfig:
     """
     Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts
+    Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference
 
     The class `VertexAIConfig` provides configuration for the VertexAI's API interface. Below are the parameters:
 
@@ -36,6 +37,12 @@ class VertexAIConfig:
 
     - `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
 
+    - `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'.
+
+    - `candidate_count` (int): Number of generated responses to return.
+
+    - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
+
     Note: Please make sure to modify the default parameters as required for your use case.
     """
 
@@ -43,6 +50,9 @@ class VertexAIConfig:
     max_output_tokens: Optional[int] = None
     top_p: Optional[float] = None
     top_k: Optional[int] = None
+    response_mime_type: Optional[str] = None
+    candidate_count: Optional[int] = None
+    stop_sequences: Optional[list] = None
 
     def __init__(
         self,
@@ -50,6 +60,9 @@ class VertexAIConfig:
         max_output_tokens: Optional[int] = None,
         top_p: Optional[float] = None,
         top_k: Optional[int] = None,
+        response_mime_type: Optional[str] = None,
+        candidate_count: Optional[int] = None,
+        stop_sequences: Optional[list] = None,
     ) -> None:
         locals_ = locals()
         for key, value in locals_.items():
diff --git a/litellm/utils.py b/litellm/utils.py
index 7540367d91..7bef853fe6 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -4840,8 +4840,17 @@ def get_optional_params(
             optional_params["top_p"] = top_p
         if stream:
             optional_params["stream"] = stream
+        if n is not None:
+            optional_params["candidate_count"] = n
+        if stop is not None:
+            if isinstance(stop, str):
+                optional_params["stop_sequences"] = [stop]
+            elif isinstance(stop, list):
+                optional_params["stop_sequences"] = stop
         if max_tokens is not None:
             optional_params["max_output_tokens"] = max_tokens
+        if response_format is not None and response_format["type"] == "json_object":
+            optional_params["response_mime_type"] = "application/json"
         if tools is not None and isinstance(tools, list):
             from vertexai.preview import generative_models
 
@@ -5528,6 +5537,9 @@ def get_supported_openai_params(model: str, custom_llm_provider: str):
             "stream",
             "tools",
             "tool_choice",
+            "response_format",
+            "n",
+            "stop",
         ]
     elif custom_llm_provider == "sagemaker":
         return ["stream", "temperature", "max_tokens", "top_p", "stop", "n"]