From a1d9e96b310770838bab03fa9a967940d4a45a72 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Thu, 26 Sep 2024 16:41:44 -0700
Subject: [PATCH] LiteLLM Minor Fixes & Improvements (09/25/2024)  (#5893)

* fix(langfuse.py): support new langfuse prompt_chat class init params

* fix(langfuse.py): handle new init values on prompt chat + prompt text templates

fixes error caused during langfuse logging

* docs(openai_compatible.md): clarify `openai/` handles correct routing for `/v1/completions` route

Fixes https://github.com/BerriAI/litellm/issues/5876

* fix(utils.py): handle unmapped gemini model optional param translation

Fixes https://github.com/BerriAI/litellm/issues/5888

* fix(o1_transformation.py): fix o-1 validation, to not raise error if temperature=1

Fixes https://github.com/BerriAI/litellm/issues/5884

* fix(prisma_client.py): refresh iam token

Fixes https://github.com/BerriAI/litellm/issues/5896

* fix: pass drop params where required

* fix(utils.py): pass drop_params correctly

* fix(types/vertex_ai.py): fix generation config

* test(test_max_completion_tokens.py): fix test

* fix(vertex_and_google_ai_studio_gemini.py): fix map openai params
---
 .../docs/providers/openai_compatible.md       |   2 +-
 litellm/integrations/langfuse.py              |  37 +-
 .../llms/OpenAI/chat/gpt_transformation.py    |  13 +-
 litellm/llms/OpenAI/chat/o1_transformation.py |  28 +-
 litellm/llms/OpenAI/openai.py                 |   8 +-
 .../gemini/transformation.py                  |  20 +-
 .../vertex_and_google_ai_studio_gemini.py     | 329 ++++++------------
 .../ai21/transformation.py                    |   7 +-
 .../llama3/transformation.py                  |   7 +-
 litellm/proxy/_new_secret_config.yaml         |  14 +-
 litellm/proxy/auth/rds_iam_token.py           |  42 ++-
 litellm/proxy/db/prisma_client.py             | 106 ++++++
 litellm/proxy/proxy_cli.py                    |  30 +-
 litellm/proxy/utils.py                        |  27 +-
 litellm/secret_managers/main.py               |   5 +-
 litellm/tests/test_alangfuse.py               | 256 ++++++++++++++
 litellm/types/llms/vertex_ai.py               |   1 +
 litellm/utils.py                              |  21 +-
 package-lock.json                             |  56 +++
 package.json                                  |   1 +
 .../test_max_completion_tokens.py             |   5 +-
 tests/llm_translation/test_optional_params.py |  32 ++
 22 files changed, 755 insertions(+), 292 deletions(-)
 create mode 100644 litellm/proxy/db/prisma_client.py

diff --git a/docs/my-website/docs/providers/openai_compatible.md b/docs/my-website/docs/providers/openai_compatible.md
index f6225d716..c7f9bf6f4 100644
--- a/docs/my-website/docs/providers/openai_compatible.md
+++ b/docs/my-website/docs/providers/openai_compatible.md
@@ -7,7 +7,7 @@ To call models hosted behind an openai proxy, make 2 changes:
 
 1. For `/chat/completions`: Put `openai/` in front of your model name, so litellm knows you're trying to call an openai `/chat/completions` endpoint. 
 
-2. For `/completions`: Put `text-completion-openai/` in front of your model name, so litellm knows you're trying to call an openai `/completions` endpoint. 
+2. For `/completions`: Put `text-completion-openai/` in front of your model name, so litellm knows you're trying to call an openai `/completions` endpoint. [NOT REQUIRED for `openai/` endpoints called via `/v1/completions` route].
 
 2. **Do NOT** add anything additional to the base url e.g. `/v1/embedding`. LiteLLM uses the openai-client to make these calls, and that automatically adds the relevant endpoints. 
 
diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py
index b2a084da3..0819cc703 100644
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@@ -1,6 +1,7 @@
 #### What this does ####
 #    On success, logs events to Langfuse
 import copy
+import inspect
 import os
 import traceback
 
@@ -676,21 +677,37 @@ def _add_prompt_to_generation_params(
         elif "version" in user_prompt and "prompt" in user_prompt:
             # prompts
             if isinstance(user_prompt["prompt"], str):
-                _prompt_obj = Prompt_Text(
-                    name=user_prompt["name"],
-                    prompt=user_prompt["prompt"],
-                    version=user_prompt["version"],
-                    config=user_prompt.get("config", None),
+                prompt_text_params = getattr(
+                    Prompt_Text, "model_fields", Prompt_Text.__fields__
                 )
+                _data = {
+                    "name": user_prompt["name"],
+                    "prompt": user_prompt["prompt"],
+                    "version": user_prompt["version"],
+                    "config": user_prompt.get("config", None),
+                }
+                if "labels" in prompt_text_params and "tags" in prompt_text_params:
+                    _data["labels"] = user_prompt.get("labels", []) or []
+                    _data["tags"] = user_prompt.get("tags", []) or []
+                _prompt_obj = Prompt_Text(**_data)  # type: ignore
                 generation_params["prompt"] = TextPromptClient(prompt=_prompt_obj)
 
             elif isinstance(user_prompt["prompt"], list):
-                _prompt_obj = Prompt_Chat(
-                    name=user_prompt["name"],
-                    prompt=user_prompt["prompt"],
-                    version=user_prompt["version"],
-                    config=user_prompt.get("config", None),
+                prompt_chat_params = getattr(
+                    Prompt_Chat, "model_fields", Prompt_Chat.__fields__
                 )
+                _data = {
+                    "name": user_prompt["name"],
+                    "prompt": user_prompt["prompt"],
+                    "version": user_prompt["version"],
+                    "config": user_prompt.get("config", None),
+                }
+                if "labels" in prompt_chat_params and "tags" in prompt_chat_params:
+                    _data["labels"] = user_prompt.get("labels", []) or []
+                    _data["tags"] = user_prompt.get("tags", []) or []
+
+                _prompt_obj = Prompt_Chat(**_data)  # type: ignore
+
                 generation_params["prompt"] = ChatPromptClient(prompt=_prompt_obj)
             else:
                 verbose_logger.error(
diff --git a/litellm/llms/OpenAI/chat/gpt_transformation.py b/litellm/llms/OpenAI/chat/gpt_transformation.py
index 4ff4790c9..6331322bf 100644
--- a/litellm/llms/OpenAI/chat/gpt_transformation.py
+++ b/litellm/llms/OpenAI/chat/gpt_transformation.py
@@ -125,7 +125,11 @@ class OpenAIGPTConfig:
         return base_params + model_specific_params
 
     def _map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
     ) -> dict:
         supported_openai_params = self.get_supported_openai_params(model)
         for param, value in non_default_params.items():
@@ -134,10 +138,15 @@ class OpenAIGPTConfig:
         return optional_params
 
     def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
     ) -> dict:
         return self._map_openai_params(
             non_default_params=non_default_params,
             optional_params=optional_params,
             model=model,
+            drop_params=drop_params,
         )
diff --git a/litellm/llms/OpenAI/chat/o1_transformation.py b/litellm/llms/OpenAI/chat/o1_transformation.py
index 200097f67..c7581ae27 100644
--- a/litellm/llms/OpenAI/chat/o1_transformation.py
+++ b/litellm/llms/OpenAI/chat/o1_transformation.py
@@ -57,7 +57,6 @@ class OpenAIO1Config(OpenAIGPTConfig):
             "parallel_tool_calls",
             "function_call",
             "functions",
-            "temperature",
             "top_p",
             "n",
             "presence_penalty",
@@ -73,13 +72,36 @@ class OpenAIO1Config(OpenAIGPTConfig):
         ]
 
     def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
     ):
         if "max_tokens" in non_default_params:
             optional_params["max_completion_tokens"] = non_default_params.pop(
                 "max_tokens"
             )
-        return super()._map_openai_params(non_default_params, optional_params, model)
+        if "temperature" in non_default_params:
+            temperature_value: Optional[float] = non_default_params.pop("temperature")
+            if temperature_value is not None:
+                if temperature_value == 0 or temperature_value == 1:
+                    optional_params["temperature"] = temperature_value
+                else:
+                    ## UNSUPPORTED TOOL CHOICE VALUE
+                    if litellm.drop_params is True or drop_params is True:
+                        pass
+                    else:
+                        raise litellm.utils.UnsupportedParamsError(
+                            message="O-1 doesn't support temperature={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
+                                temperature_value
+                            ),
+                            status_code=400,
+                        )
+
+        return super()._map_openai_params(
+            non_default_params, optional_params, model, drop_params
+        )
 
     def is_model_o1_reasoning_model(self, model: str) -> bool:
         if model in litellm.open_ai_chat_completion_models and "o1" in model:
diff --git a/litellm/llms/OpenAI/openai.py b/litellm/llms/OpenAI/openai.py
index aafb14bd1..5df5b1132 100644
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@@ -413,7 +413,11 @@ class OpenAIConfig:
         return optional_params
 
     def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
     ) -> dict:
         """ """
         if litellm.OpenAIO1Config().is_model_o1_reasoning_model(model=model):
@@ -421,11 +425,13 @@ class OpenAIConfig:
                 non_default_params=non_default_params,
                 optional_params=optional_params,
                 model=model,
+                drop_params=drop_params,
             )
         return litellm.OpenAIGPTConfig().map_openai_params(
             non_default_params=non_default_params,
             optional_params=optional_params,
             model=model,
+            drop_params=drop_params,
         )
 
 
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py
index 78874d544..075c0d169 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py
@@ -22,7 +22,7 @@ from litellm.types.llms.vertex_ai import (
     Tools,
 )
 
-from ..common_utils import get_supports_system_message, get_supports_response_schema
+from ..common_utils import get_supports_response_schema, get_supports_system_message
 from ..vertex_ai_non_gemini import _gemini_convert_messages_with_history
 
 
@@ -73,8 +73,14 @@ def _transform_request_body(
         safety_settings: Optional[List[SafetSettingsConfig]] = optional_params.pop(
             "safety_settings", None
         )  # type: ignore
+        config_fields = GenerationConfig.__annotations__.keys()
+
+        filtered_params = {
+            k: v for k, v in optional_params.items() if k in config_fields
+        }
+
         generation_config: Optional[GenerationConfig] = GenerationConfig(
-            **optional_params
+            **filtered_params
         )
         data = RequestBody(contents=content)
         if system_instructions is not None:
@@ -104,7 +110,7 @@ def sync_transform_request_body(
     timeout: Optional[Union[float, httpx.Timeout]],
     extra_headers: Optional[dict],
     optional_params: dict,
-    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
+    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,  # type: ignore
     custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
     litellm_params: dict,
 ) -> RequestBody:
@@ -146,7 +152,7 @@ async def async_transform_request_body(
     timeout: Optional[Union[float, httpx.Timeout]],
     extra_headers: Optional[dict],
     optional_params: dict,
-    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
+    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,  # type: ignore
     custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
     litellm_params: dict,
 ) -> RequestBody:
@@ -199,6 +205,7 @@ def _transform_system_message(
     if supports_system_message is True:
         for idx, message in enumerate(messages):
             if message["role"] == "system":
+                _system_content_block: Optional[PartType] = None
                 if isinstance(message["content"], str):
                     _system_content_block = PartType(text=message["content"])
                 elif isinstance(message["content"], list):
@@ -206,8 +213,9 @@ def _transform_system_message(
                     for content in message["content"]:
                         system_text += content.get("text") or ""
                     _system_content_block = PartType(text=system_text)
-                system_content_blocks.append(_system_content_block)
-                system_prompt_indices.append(idx)
+                if _system_content_block is not None:
+                    system_content_blocks.append(_system_content_block)
+                    system_prompt_indices.append(idx)
         if len(system_prompt_indices) > 0:
             for idx in reversed(system_prompt_indices):
                 messages.pop(idx)
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
index 35a7e8337..78bd51003 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
@@ -252,233 +252,6 @@ class VertexAIConfig:
         ]
 
 
-class GoogleAIStudioGeminiConfig:  # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported
-    """
-    Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig
-
-    The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters:
-
-    - `temperature` (float): This controls the degree of randomness in token selection.
-
-    - `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
-
-    - `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
-
-    - `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
-
-    - `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`.
-
-    - `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response.
-
-    - `candidate_count` (int): Number of generated responses to return.
-
-    - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
-
-    Note: Please make sure to modify the default parameters as required for your use case.
-    """
-
-    temperature: Optional[float] = None
-    max_output_tokens: Optional[int] = None
-    top_p: Optional[float] = None
-    top_k: Optional[int] = None
-    response_mime_type: Optional[str] = None
-    response_schema: Optional[dict] = None
-    candidate_count: Optional[int] = None
-    stop_sequences: Optional[list] = None
-
-    def __init__(
-        self,
-        temperature: Optional[float] = None,
-        max_output_tokens: Optional[int] = None,
-        top_p: Optional[float] = None,
-        top_k: Optional[int] = None,
-        response_mime_type: Optional[str] = None,
-        response_schema: Optional[dict] = None,
-        candidate_count: Optional[int] = None,
-        stop_sequences: Optional[list] = None,
-    ) -> None:
-        locals_ = locals()
-        for key, value in locals_.items():
-            if key != "self" and value is not None:
-                setattr(self.__class__, key, value)
-
-    @classmethod
-    def get_config(cls):
-        return {
-            k: v
-            for k, v in cls.__dict__.items()
-            if not k.startswith("__")
-            and not isinstance(
-                v,
-                (
-                    types.FunctionType,
-                    types.BuiltinFunctionType,
-                    classmethod,
-                    staticmethod,
-                ),
-            )
-            and v is not None
-        }
-
-    def get_supported_openai_params(self):
-        return [
-            "temperature",
-            "top_p",
-            "max_tokens",
-            "max_completion_tokens",
-            "stream",
-            "tools",
-            "tool_choice",
-            "functions",
-            "response_format",
-            "n",
-            "stop",
-        ]
-
-    def _map_function(self, value: List[dict]) -> List[Tools]:
-        gtool_func_declarations = []
-        googleSearchRetrieval: Optional[dict] = None
-
-        for tool in value:
-            openai_function_object: Optional[ChatCompletionToolParamFunctionChunk] = (
-                None
-            )
-            if "function" in tool:  # tools list
-                openai_function_object = ChatCompletionToolParamFunctionChunk(  # type: ignore
-                    **tool["function"]
-                )
-            elif "name" in tool:  # functions list
-                openai_function_object = ChatCompletionToolParamFunctionChunk(**tool)  # type: ignore
-
-            # check if grounding
-            if tool.get("googleSearchRetrieval", None) is not None:
-                googleSearchRetrieval = tool["googleSearchRetrieval"]
-            elif openai_function_object is not None:
-                gtool_func_declaration = FunctionDeclaration(
-                    name=openai_function_object["name"],
-                    description=openai_function_object.get("description", ""),
-                    parameters=openai_function_object.get("parameters", {}),
-                )
-                gtool_func_declarations.append(gtool_func_declaration)
-            else:
-                # assume it's a provider-specific param
-                verbose_logger.warning(
-                    "Invalid tool={}. Use `litellm.set_verbose` or `litellm --detailed_debug` to see raw request."
-                )
-
-        _tools = Tools(
-            function_declarations=gtool_func_declarations,
-        )
-        if googleSearchRetrieval is not None:
-            _tools["googleSearchRetrieval"] = googleSearchRetrieval
-        return [_tools]
-
-    def map_tool_choice_values(
-        self, model: str, tool_choice: Union[str, dict]
-    ) -> Optional[ToolConfig]:
-        if tool_choice == "none":
-            return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="NONE"))
-        elif tool_choice == "required":
-            return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="ANY"))
-        elif tool_choice == "auto":
-            return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="AUTO"))
-        elif isinstance(tool_choice, dict):
-            # only supported for anthropic + mistral models - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
-            name = tool_choice.get("function", {}).get("name", "")
-            return ToolConfig(
-                functionCallingConfig=FunctionCallingConfig(
-                    mode="ANY", allowed_function_names=[name]
-                )
-            )
-        else:
-            raise litellm.utils.UnsupportedParamsError(
-                message="VertexAI doesn't support tool_choice={}. Supported tool_choice values=['auto', 'required', json object]. To drop it from the call, set `litellm.drop_params = True.".format(
-                    tool_choice
-                ),
-                status_code=400,
-            )
-
-    def map_openai_params(
-        self,
-        model: str,
-        non_default_params: dict,
-        optional_params: dict,
-    ):
-        for param, value in non_default_params.items():
-            if param == "temperature":
-                optional_params["temperature"] = value
-            if param == "top_p":
-                optional_params["top_p"] = value
-            if (
-                param == "stream" and value is True
-            ):  # sending stream = False, can cause it to get passed unchecked and raise issues
-                optional_params["stream"] = value
-            if param == "n":
-                optional_params["candidate_count"] = value
-            if param == "stop":
-                if isinstance(value, str):
-                    optional_params["stop_sequences"] = [value]
-                elif isinstance(value, list):
-                    optional_params["stop_sequences"] = value
-            if param == "max_tokens" or param == "max_completion_tokens":
-                optional_params["max_output_tokens"] = value
-            if param == "response_format":  # type: ignore
-                if value["type"] == "json_object":  # type: ignore
-                    if value["type"] == "json_object":  # type: ignore
-                        optional_params["response_mime_type"] = "application/json"
-                    elif value["type"] == "text":  # type: ignore
-                        optional_params["response_mime_type"] = "text/plain"
-                    if "response_schema" in value:  # type: ignore
-                        optional_params["response_mime_type"] = "application/json"
-                        optional_params["response_schema"] = value["response_schema"]  # type: ignore
-                elif value["type"] == "json_schema":  # type: ignore
-                    if "json_schema" in value and "schema" in value["json_schema"]:  # type: ignore
-                        optional_params["response_mime_type"] = "application/json"
-                        optional_params["response_schema"] = value["json_schema"]["schema"]  # type: ignore
-            if (param == "tools" or param == "functions") and isinstance(value, list):
-                optional_params["tools"] = self._map_function(value=value)
-                optional_params["litellm_param_is_function_call"] = (
-                    True if param == "functions" else False
-                )
-            if param == "tool_choice" and (
-                isinstance(value, str) or isinstance(value, dict)
-            ):
-                _tool_choice_value = self.map_tool_choice_values(
-                    model=model, tool_choice=value  # type: ignore
-                )
-                if _tool_choice_value is not None:
-                    optional_params["tool_choice"] = _tool_choice_value
-        return optional_params
-
-    def get_mapped_special_auth_params(self) -> dict:
-        """
-        Common auth params across bedrock/vertex_ai/azure/watsonx
-        """
-        return {"project": "vertex_project", "region_name": "vertex_location"}
-
-    def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
-        mapped_params = self.get_mapped_special_auth_params()
-
-        for param, value in non_default_params.items():
-            if param in mapped_params:
-                optional_params[mapped_params[param]] = value
-        return optional_params
-
-    def get_flagged_finish_reasons(self) -> Dict[str, str]:
-        """
-        Return Dictionary of finish reasons which indicate response was flagged
-
-        and what it means
-        """
-        return {
-            "SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.",
-            "RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.",
-            "BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.",
-            "PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.",
-            "SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
-        }
-
-
 class VertexGeminiConfig:
     """
     Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts
@@ -752,6 +525,108 @@ class VertexGeminiConfig:
         return exception_string
 
 
+class GoogleAIStudioGeminiConfig(
+    VertexGeminiConfig
+):  # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported
+    """
+    Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig
+
+    The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters:
+
+    - `temperature` (float): This controls the degree of randomness in token selection.
+
+    - `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
+
+    - `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
+
+    - `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
+
+    - `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`.
+
+    - `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response.
+
+    - `candidate_count` (int): Number of generated responses to return.
+
+    - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
+
+    Note: Please make sure to modify the default parameters as required for your use case.
+    """
+
+    temperature: Optional[float] = None
+    max_output_tokens: Optional[int] = None
+    top_p: Optional[float] = None
+    top_k: Optional[int] = None
+    response_mime_type: Optional[str] = None
+    response_schema: Optional[dict] = None
+    candidate_count: Optional[int] = None
+    stop_sequences: Optional[list] = None
+
+    def __init__(
+        self,
+        temperature: Optional[float] = None,
+        max_output_tokens: Optional[int] = None,
+        top_p: Optional[float] = None,
+        top_k: Optional[int] = None,
+        response_mime_type: Optional[str] = None,
+        response_schema: Optional[dict] = None,
+        candidate_count: Optional[int] = None,
+        stop_sequences: Optional[list] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params(self):
+        return [
+            "temperature",
+            "top_p",
+            "max_tokens",
+            "max_completion_tokens",
+            "stream",
+            "tools",
+            "tool_choice",
+            "functions",
+            "response_format",
+            "n",
+            "stop",
+        ]
+
+    def map_openai_params(
+        self,
+        model: str,
+        non_default_params: Dict,
+        optional_params: Dict,
+        drop_params: bool,
+    ):
+        # drop frequency_penalty and presence_penalty
+        if "frequency_penalty" in non_default_params:
+            del non_default_params["frequency_penalty"]
+        if "presence_penalty" in non_default_params:
+            del non_default_params["presence_penalty"]
+        return super().map_openai_params(
+            model, non_default_params, optional_params, drop_params
+        )
+
+
 async def make_call(
     client: Optional[AsyncHTTPHandler],
     api_base: str,
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py
index 2d9d6076e..cb3364445 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py
@@ -44,7 +44,11 @@ class VertexAIAi21Config:
         return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
 
     def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
     ):
         if "max_completion_tokens" in non_default_params:
             non_default_params["max_tokens"] = non_default_params.pop(
@@ -54,4 +58,5 @@ class VertexAIAi21Config:
             non_default_params=non_default_params,
             optional_params=optional_params,
             model=model,
+            drop_params=drop_params,
         )
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py
index 683e0ff8e..2170a9241 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py
@@ -50,7 +50,11 @@ class VertexAILlama3Config:
         return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
 
     def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
     ):
         if "max_completion_tokens" in non_default_params:
             non_default_params["max_tokens"] = non_default_params.pop(
@@ -60,4 +64,5 @@ class VertexAILlama3Config:
             non_default_params=non_default_params,
             optional_params=optional_params,
             model=model,
+            drop_params=drop_params,
         )
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 8cc73b050..c52972be0 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -31,15 +31,21 @@ model_list:
   - model_name: "anthropic/*"
     litellm_params:
       model: "anthropic/*"
-  - model_name: "openai/*"
+  - model_name: "*"
     litellm_params:
       model: "openai/*"
   - model_name: "fireworks_ai/*"
     litellm_params:
       model: "fireworks_ai/*"
       configurable_clientside_auth_params: ["api_base"]
-
+  - model_name: "gemini-flash-experimental"
+    litellm_params:
+      model: "vertex_ai/gemini-flash-experimental"
 
 litellm_settings:
-  success_callback: ["langfuse"]
-  cache: true
\ No newline at end of file
+  success_callback: ["langfuse", "prometheus"]
+  failure_callback: ["prometheus"]
+
+general_settings: 
+  proxy_budget_rescheduler_min_time: 1
+  proxy_budget_rescheduler_max_time: 1
\ No newline at end of file
diff --git a/litellm/proxy/auth/rds_iam_token.py b/litellm/proxy/auth/rds_iam_token.py
index f83621584..474a9cac2 100644
--- a/litellm/proxy/auth/rds_iam_token.py
+++ b/litellm/proxy/auth/rds_iam_token.py
@@ -1,5 +1,5 @@
 import os
-from typing import Optional, Union
+from typing import Any, Optional, Union
 
 import httpx
 
@@ -34,7 +34,7 @@ def init_rds_client(
     # Iterate over parameters and update if needed
     for i, param in enumerate(params_to_check):
         if param and param.startswith("os.environ/"):
-            params_to_check[i] = get_secret(param)
+            params_to_check[i] = get_secret(param)  # type: ignore
     # Assign updated values back to parameters
     (
         aws_access_key_id,
@@ -62,13 +62,13 @@ def init_rds_client(
     import boto3
 
     if isinstance(timeout, float):
-        config = boto3.session.Config(connect_timeout=timeout, read_timeout=timeout)
+        config = boto3.session.Config(connect_timeout=timeout, read_timeout=timeout)  # type: ignore
     elif isinstance(timeout, httpx.Timeout):
-        config = boto3.session.Config(
+        config = boto3.session.Config(  # type: ignore
             connect_timeout=timeout.connect, read_timeout=timeout.read
         )
     else:
-        config = boto3.session.Config()
+        config = boto3.session.Config()  # type: ignore
 
     ### CHECK STS ###
     if (
@@ -105,6 +105,7 @@ def init_rds_client(
             region_name=region_name,
             config=config,
         )
+
     elif aws_role_name is not None and aws_session_name is not None:
         # use sts if role name passed in
         sts_client = boto3.client(
@@ -144,6 +145,7 @@ def init_rds_client(
             region_name=region_name,
             config=config,
         )
+
     else:
         # aws_access_key_id is None, assume user is trying to auth using env variables
         # boto3 automatically reads env variables
@@ -157,25 +159,31 @@ def init_rds_client(
     return client
 
 
-def generate_iam_auth_token(db_host, db_port, db_user) -> str:
+def generate_iam_auth_token(
+    db_host, db_port, db_user, client: Optional[Any] = None
+) -> str:
     from urllib.parse import quote
 
     import boto3
 
-    boto_client = init_rds_client(
-        aws_region_name=os.getenv("AWS_REGION_NAME"),
-        aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
-        aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
-        aws_session_name=os.getenv("AWS_SESSION_NAME"),
-        aws_profile_name=os.getenv("AWS_PROFILE_NAME"),
-        aws_role_name=os.getenv("AWS_ROLE_NAME", os.getenv("AWS_ROLE_ARN")),
-        aws_web_identity_token=os.getenv(
-            "AWS_WEB_IDENTITY_TOKEN", os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
-        ),
-    )
+    if client is None:
+        boto_client = init_rds_client(
+            aws_region_name=os.getenv("AWS_REGION_NAME"),
+            aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
+            aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
+            aws_session_name=os.getenv("AWS_SESSION_NAME"),
+            aws_profile_name=os.getenv("AWS_PROFILE_NAME"),
+            aws_role_name=os.getenv("AWS_ROLE_NAME", os.getenv("AWS_ROLE_ARN")),
+            aws_web_identity_token=os.getenv(
+                "AWS_WEB_IDENTITY_TOKEN", os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
+            ),
+        )
+    else:
+        boto_client = client
 
     token = boto_client.generate_db_auth_token(
         DBHostname=db_host, Port=db_port, DBUsername=db_user
     )
     cleaned_token = quote(token, safe="")
+
     return cleaned_token
diff --git a/litellm/proxy/db/prisma_client.py b/litellm/proxy/db/prisma_client.py
new file mode 100644
index 000000000..5e7fc4f79
--- /dev/null
+++ b/litellm/proxy/db/prisma_client.py
@@ -0,0 +1,106 @@
+import asyncio
+import os
+import urllib
+import urllib.parse
+from datetime import datetime, timedelta
+from typing import Any, Callable, Optional
+
+
+class PrismaWrapper:
+    def __init__(self, original_prisma: Any, iam_token_db_auth: bool):
+        self._original_prisma = original_prisma
+        self.iam_token_db_auth = iam_token_db_auth
+
+    def is_token_expired(self, token_url: Optional[str]) -> bool:
+        if token_url is None:
+            return True
+        # Decode the token URL to handle URL-encoded characters
+        decoded_url = urllib.parse.unquote(token_url)
+
+        # Parse the token URL
+        parsed_url = urllib.parse.urlparse(decoded_url)
+
+        # Parse the query parameters from the path component (if they exist there)
+        query_params = urllib.parse.parse_qs(parsed_url.query)
+
+        # Get expiration time from the query parameters
+        expires = query_params.get("X-Amz-Expires", [None])[0]
+        if expires is None:
+            raise ValueError("X-Amz-Expires parameter is missing or invalid.")
+
+        expires_int = int(expires)
+
+        # Get the token's creation time from the X-Amz-Date parameter
+        token_time_str = query_params.get("X-Amz-Date", [""])[0]
+        if not token_time_str:
+            raise ValueError("X-Amz-Date parameter is missing or invalid.")
+
+        # Ensure the token time string is parsed correctly
+        try:
+            token_time = datetime.strptime(token_time_str, "%Y%m%dT%H%M%SZ")
+        except ValueError as e:
+            raise ValueError(f"Invalid X-Amz-Date format: {e}")
+
+        # Calculate the expiration time
+        expiration_time = token_time + timedelta(seconds=expires_int)
+
+        # Current time in UTC
+        current_time = datetime.utcnow()
+
+        # Check if the token is expired
+        return current_time > expiration_time
+
+    def get_rds_iam_token(self) -> Optional[str]:
+        if self.iam_token_db_auth:
+            from litellm.proxy.auth.rds_iam_token import generate_iam_auth_token
+
+            db_host = os.getenv("DATABASE_HOST")
+            db_port = os.getenv("DATABASE_PORT")
+            db_user = os.getenv("DATABASE_USER")
+            db_name = os.getenv("DATABASE_NAME")
+            db_schema = os.getenv("DATABASE_SCHEMA")
+
+            token = generate_iam_auth_token(
+                db_host=db_host, db_port=db_port, db_user=db_user
+            )
+
+            # print(f"token: {token}")
+            _db_url = f"postgresql://{db_user}:{token}@{db_host}:{db_port}/{db_name}"
+            if db_schema:
+                _db_url += f"?schema={db_schema}"
+
+            os.environ["DATABASE_URL"] = _db_url
+            return _db_url
+        return None
+
+    async def recreate_prisma_client(
+        self, new_db_url: str, http_client: Optional[Any] = None
+    ):
+        from prisma import Prisma  # type: ignore
+
+        if http_client is not None:
+            self._original_prisma = Prisma(http=http_client)
+        else:
+            self._original_prisma = Prisma()
+
+        await self._original_prisma.connect()
+
+    def __getattr__(self, name: str):
+        original_attr = getattr(self._original_prisma, name)
+        if self.iam_token_db_auth:
+            db_url = os.getenv("DATABASE_URL")
+            if self.is_token_expired(db_url):
+                db_url = self.get_rds_iam_token()
+                loop = asyncio.get_event_loop()
+
+                if db_url:
+                    if loop.is_running():
+                        asyncio.run_coroutine_threadsafe(
+                            self.recreate_prisma_client(db_url), loop
+                        )
+                    else:
+                        asyncio.run(self.recreate_prisma_client(db_url))
+                else:
+                    raise ValueError("Failed to get RDS IAM token")
+
+        return original_attr
diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
index 7da9fe479..9003b885e 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@@ -40,7 +40,7 @@ def append_query_params(url, params) -> str:
     parsed_query.update(params)
     encoded_query = urlparse.urlencode(parsed_query, doseq=True)
     modified_url = urlparse.urlunparse(parsed_url._replace(query=encoded_query))
-    return modified_url
+    return modified_url  # type: ignore
 
 
 def run_ollama_serve():
@@ -287,7 +287,7 @@ def run_server(
                     save_worker_config,
                 )
     if version == True:
-        pkg_version = importlib.metadata.version("litellm")
+        pkg_version = importlib.metadata.version("litellm")  # type: ignore
         click.echo(f"\nLiteLLM: Current Version = {pkg_version}\n")
         return
     if model and "ollama" in model and api_base is None:
@@ -338,14 +338,14 @@ def run_server(
         futures = []
         start_time = time.time()
         # Make concurrent calls
-        with concurrent.futures.ThreadPoolExecutor(
+        with concurrent.futures.ThreadPoolExecutor(  # type: ignore
             max_workers=concurrent_calls
         ) as executor:
             for _ in range(concurrent_calls):
                 futures.append(executor.submit(_make_openai_completion))
 
         # Wait for all futures to complete
-        concurrent.futures.wait(futures)
+        concurrent.futures.wait(futures)  # type: ignore
 
         # Summarize the results
         successful_calls = 0
@@ -476,6 +476,7 @@ def run_server(
                 _db_url += f"?schema={db_schema}"
 
             os.environ["DATABASE_URL"] = _db_url
+            os.environ["IAM_TOKEN_DB_AUTH"] = "True"
 
         ### DECRYPT ENV VAR ###
 
@@ -600,8 +601,9 @@ def run_server(
                     0, os.path.abspath("../..")
                 )  # Adds the parent directory to the system path - for litellm local dev
                 import litellm
+                from litellm import get_secret_str
 
-                database_url = litellm.get_secret(database_url, default_value=None)
+                database_url = get_secret_str(database_url, default_value=None)
                 os.chdir(original_dir)
             if database_url is not None and isinstance(database_url, str):
                 os.environ["DATABASE_URL"] = database_url
@@ -650,6 +652,8 @@ def run_server(
                         subprocess.run(["prisma", "db", "push", "--accept-data-loss"])
                         break  # Exit the loop if the subprocess succeeds
                     except subprocess.CalledProcessError as e:
+                        import time
+
                         print(f"Error: {e}")  # noqa
                         time.sleep(random.randrange(start=1, stop=5))
                     finally:
@@ -728,13 +732,17 @@ def run_server(
 
                 def load_config(self):
                     # note: This Loads the gunicorn config - has nothing to do with LiteLLM Proxy config
-                    config = {
-                        key: value
-                        for key, value in self.options.items()
-                        if key in self.cfg.settings and value is not None
-                    }
+                    if self.cfg is not None:
+                        config = {
+                            key: value
+                            for key, value in self.options.items()
+                            if key in self.cfg.settings and value is not None
+                        }
+                    else:
+                        config = {}
                     for key, value in config.items():
-                        self.cfg.set(key.lower(), value)
+                        if self.cfg is not None:
+                            self.cfg.set(key.lower(), value)
 
                 def load(self):
                     # gunicorn app function
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 80eea79fb..18361bca1 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -65,11 +65,13 @@ from litellm.proxy.db.create_views import (
     create_missing_views,
     should_create_missing_views,
 )
+from litellm.proxy.db.prisma_client import PrismaWrapper
 from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
 from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
 from litellm.proxy.hooks.parallel_request_limiter import (
     _PROXY_MaxParallelRequestsHandler,
 )
+from litellm.secret_managers.main import str_to_bool
 from litellm.types.utils import CallTypes, LoggedLiteLLMParams
 
 if TYPE_CHECKING:
@@ -1017,6 +1019,9 @@ class PrismaClient:
         )
         ## init logging object
         self.proxy_logging_obj = proxy_logging_obj
+        self.iam_token_db_auth: Optional[bool] = str_to_bool(
+            os.getenv("IAM_TOKEN_DB_AUTH")
+        )
         try:
             from prisma import Prisma  # type: ignore
         except Exception as e:
@@ -1043,9 +1048,23 @@ class PrismaClient:
             from prisma import Prisma  # type: ignore
         verbose_proxy_logger.debug("Connecting Prisma Client to DB..")
         if http_client is not None:
-            self.db = Prisma(http=http_client)
+            self.db = PrismaWrapper(
+                original_prisma=Prisma(http=http_client),
+                iam_token_db_auth=(
+                    self.iam_token_db_auth
+                    if self.iam_token_db_auth is not None
+                    else False
+                ),
+            )
         else:
-            self.db = Prisma()  # Client to connect to Prisma db
+            self.db = PrismaWrapper(
+                original_prisma=Prisma(),
+                iam_token_db_auth=(
+                    self.iam_token_db_auth
+                    if self.iam_token_db_auth is not None
+                    else False
+                ),
+            )  # Client to connect to Prisma db
         verbose_proxy_logger.debug("Success - Connected Prisma Client to DB")
 
     def hash_token(self, token: str):
@@ -1141,9 +1160,9 @@ class PrismaClient:
                         "LiteLLM_VerificationTokenView Created in DB!"
                     )
                 else:
-                    should_create_views = await should_create_missing_views(db=self.db)
+                    should_create_views = await should_create_missing_views(db=self.db.db)  # type: ignore
                     if should_create_views:
-                        await create_missing_views(db=self.db)
+                        await create_missing_views(db=self.db)  # type: ignore
                     else:
                         # don't block execution if these views are missing
                         # Convert lists to sets for efficient difference calculation
diff --git a/litellm/secret_managers/main.py b/litellm/secret_managers/main.py
index e98140768..ec277a6ae 100644
--- a/litellm/secret_managers/main.py
+++ b/litellm/secret_managers/main.py
@@ -29,7 +29,7 @@ def _is_base64(s):
         return False
 
 
-def str_to_bool(value: str) -> Optional[bool]:
+def str_to_bool(value: Optional[str]) -> Optional[bool]:
     """
     Converts a string to a boolean if it's a recognized boolean string.
     Returns None if the string is not a recognized boolean value.
@@ -37,6 +37,9 @@ def str_to_bool(value: str) -> Optional[bool]:
     :param value: The string to be checked.
     :return: True or False if the string is a recognized boolean, otherwise None.
     """
+    if value is None:
+        return None
+
     true_values = {"true"}
     false_values = {"false"}
 
diff --git a/litellm/tests/test_alangfuse.py b/litellm/tests/test_alangfuse.py
index 903b01411..fa1260637 100644
--- a/litellm/tests/test_alangfuse.py
+++ b/litellm/tests/test_alangfuse.py
@@ -968,3 +968,259 @@ def test_aaalangfuse_dynamic_logging():
     )
 
     langfuse_client.get_trace(id=trace_id)
+
+
+import datetime
+
+generation_params = {
+    "name": "litellm-acompletion",
+    "id": "time-10-35-32-316778_chatcmpl-ABQDEzVJS8fziPdvkeTA3tnQaxeMX",
+    "start_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 316778),
+    "end_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 897141),
+    "model": "gpt-4o",
+    "model_parameters": {
+        "stream": False,
+        "max_retries": 0,
+        "extra_body": "{}",
+        "system_fingerprint": "fp_52a7f40b0b",
+    },
+    "input": {
+        "messages": [
+            {"content": "<>", "role": "system"},
+            {"content": "<>", "role": "user"},
+        ]
+    },
+    "output": {
+        "content": "Hello! It looks like your message might have been sent by accident. How can I assist you today?",
+        "role": "assistant",
+        "tool_calls": None,
+        "function_call": None,
+    },
+    "usage": {"prompt_tokens": 13, "completion_tokens": 21, "total_cost": 0.00038},
+    "metadata": {
+        "prompt": {
+            "name": "conversational-service-answer_question_restricted_reply",
+            "version": 9,
+            "config": {},
+            "labels": ["latest", "staging", "production"],
+            "tags": ["conversational-service"],
+            "prompt": [
+                {"role": "system", "content": "<>"},
+                {"role": "user", "content": "{{text}}"},
+            ],
+        },
+        "requester_metadata": {
+            "session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1",
+            "trace_name": "jess",
+            "tags": ["conversational-service", "generative-ai-engine", "staging"],
+            "prompt": {
+                "name": "conversational-service-answer_question_restricted_reply",
+                "version": 9,
+                "config": {},
+                "labels": ["latest", "staging", "production"],
+                "tags": ["conversational-service"],
+                "prompt": [
+                    {"role": "system", "content": "<>"},
+                    {"role": "user", "content": "{{text}}"},
+                ],
+            },
+        },
+        "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
+        "litellm_api_version": "0.0.0",
+        "user_api_key_user_id": "default_user_id",
+        "user_api_key_spend": 0.0,
+        "user_api_key_metadata": {},
+        "requester_ip_address": "127.0.0.1",
+        "model_group": "gpt-4o",
+        "model_group_size": 0,
+        "deployment": "gpt-4o",
+        "model_info": {
+            "id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
+            "db_model": False,
+        },
+        "hidden_params": {
+            "headers": {
+                "date": "Wed, 25 Sep 2024 17:35:32 GMT",
+                "content-type": "application/json",
+                "transfer-encoding": "chunked",
+                "connection": "keep-alive",
+                "access-control-expose-headers": "X-Request-ID",
+                "openai-organization": "reliablekeystest",
+                "openai-processing-ms": "329",
+                "openai-version": "2020-10-01",
+                "strict-transport-security": "max-age=31536000; includeSubDomains; preload",
+                "x-ratelimit-limit-requests": "10000",
+                "x-ratelimit-limit-tokens": "30000000",
+                "x-ratelimit-remaining-requests": "9999",
+                "x-ratelimit-remaining-tokens": "29999980",
+                "x-ratelimit-reset-requests": "6ms",
+                "x-ratelimit-reset-tokens": "0s",
+                "x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
+                "cf-cache-status": "DYNAMIC",
+                "set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
+                "x-content-type-options": "nosniff",
+                "server": "cloudflare",
+                "cf-ray": "8c8cc573becb232c-SJC",
+                "content-encoding": "gzip",
+                "alt-svc": 'h3=":443"; ma=86400',
+            },
+            "additional_headers": {
+                "llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT",
+                "llm_provider-content-type": "application/json",
+                "llm_provider-transfer-encoding": "chunked",
+                "llm_provider-connection": "keep-alive",
+                "llm_provider-access-control-expose-headers": "X-Request-ID",
+                "llm_provider-openai-organization": "reliablekeystest",
+                "llm_provider-openai-processing-ms": "329",
+                "llm_provider-openai-version": "2020-10-01",
+                "llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload",
+                "llm_provider-x-ratelimit-limit-requests": "10000",
+                "llm_provider-x-ratelimit-limit-tokens": "30000000",
+                "llm_provider-x-ratelimit-remaining-requests": "9999",
+                "llm_provider-x-ratelimit-remaining-tokens": "29999980",
+                "llm_provider-x-ratelimit-reset-requests": "6ms",
+                "llm_provider-x-ratelimit-reset-tokens": "0s",
+                "llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
+                "llm_provider-cf-cache-status": "DYNAMIC",
+                "llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
+                "llm_provider-x-content-type-options": "nosniff",
+                "llm_provider-server": "cloudflare",
+                "llm_provider-cf-ray": "8c8cc573becb232c-SJC",
+                "llm_provider-content-encoding": "gzip",
+                "llm_provider-alt-svc": 'h3=":443"; ma=86400',
+            },
+            "litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c",
+            "model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
+            "api_base": "https://api.openai.com",
+            "optional_params": {
+                "stream": False,
+                "max_retries": 0,
+                "extra_body": {},
+            },
+            "response_cost": 0.00038,
+        },
+        "litellm_response_cost": 0.00038,
+        "api_base": "https://api.openai.com/v1/",
+        "cache_hit": False,
+    },
+    "level": "DEFAULT",
+    "version": None,
+}
+
+
+@pytest.mark.parametrize(
+    "prompt",
+    [
+        [
+            {"role": "system", "content": "<>"},
+            {"role": "user", "content": "{{text}}"},
+        ],
+        "hello world",
+    ],
+)
+def test_langfuse_prompt_type(prompt):
+
+    from litellm.integrations.langfuse import _add_prompt_to_generation_params
+
+    clean_metadata = {
+        "prompt": {
+            "name": "conversational-service-answer_question_restricted_reply",
+            "version": 9,
+            "config": {},
+            "labels": ["latest", "staging", "production"],
+            "tags": ["conversational-service"],
+            "prompt": prompt,
+        },
+        "requester_metadata": {
+            "session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1",
+            "trace_name": "jess",
+            "tags": ["conversational-service", "generative-ai-engine", "staging"],
+            "prompt": {
+                "name": "conversational-service-answer_question_restricted_reply",
+                "version": 9,
+                "config": {},
+                "labels": ["latest", "staging", "production"],
+                "tags": ["conversational-service"],
+                "prompt": [
+                    {"role": "system", "content": "<>"},
+                    {"role": "user", "content": "{{text}}"},
+                ],
+            },
+        },
+        "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
+        "litellm_api_version": "0.0.0",
+        "user_api_key_user_id": "default_user_id",
+        "user_api_key_spend": 0.0,
+        "user_api_key_metadata": {},
+        "requester_ip_address": "127.0.0.1",
+        "model_group": "gpt-4o",
+        "model_group_size": 0,
+        "deployment": "gpt-4o",
+        "model_info": {
+            "id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
+            "db_model": False,
+        },
+        "hidden_params": {
+            "headers": {
+                "date": "Wed, 25 Sep 2024 17:35:32 GMT",
+                "content-type": "application/json",
+                "transfer-encoding": "chunked",
+                "connection": "keep-alive",
+                "access-control-expose-headers": "X-Request-ID",
+                "openai-organization": "reliablekeystest",
+                "openai-processing-ms": "329",
+                "openai-version": "2020-10-01",
+                "strict-transport-security": "max-age=31536000; includeSubDomains; preload",
+                "x-ratelimit-limit-requests": "10000",
+                "x-ratelimit-limit-tokens": "30000000",
+                "x-ratelimit-remaining-requests": "9999",
+                "x-ratelimit-remaining-tokens": "29999980",
+                "x-ratelimit-reset-requests": "6ms",
+                "x-ratelimit-reset-tokens": "0s",
+                "x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
+                "cf-cache-status": "DYNAMIC",
+                "set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
+                "x-content-type-options": "nosniff",
+                "server": "cloudflare",
+                "cf-ray": "8c8cc573becb232c-SJC",
+                "content-encoding": "gzip",
+                "alt-svc": 'h3=":443"; ma=86400',
+            },
+            "additional_headers": {
+                "llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT",
+                "llm_provider-content-type": "application/json",
+                "llm_provider-transfer-encoding": "chunked",
+                "llm_provider-connection": "keep-alive",
+                "llm_provider-access-control-expose-headers": "X-Request-ID",
+                "llm_provider-openai-organization": "reliablekeystest",
+                "llm_provider-openai-processing-ms": "329",
+                "llm_provider-openai-version": "2020-10-01",
+                "llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload",
+                "llm_provider-x-ratelimit-limit-requests": "10000",
+                "llm_provider-x-ratelimit-limit-tokens": "30000000",
+                "llm_provider-x-ratelimit-remaining-requests": "9999",
+                "llm_provider-x-ratelimit-remaining-tokens": "29999980",
+                "llm_provider-x-ratelimit-reset-requests": "6ms",
+                "llm_provider-x-ratelimit-reset-tokens": "0s",
+                "llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
+                "llm_provider-cf-cache-status": "DYNAMIC",
+                "llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
+                "llm_provider-x-content-type-options": "nosniff",
+                "llm_provider-server": "cloudflare",
+                "llm_provider-cf-ray": "8c8cc573becb232c-SJC",
+                "llm_provider-content-encoding": "gzip",
+                "llm_provider-alt-svc": 'h3=":443"; ma=86400',
+            },
+            "litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c",
+            "model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
+            "api_base": "https://api.openai.com",
+            "optional_params": {"stream": False, "max_retries": 0, "extra_body": {}},
+            "response_cost": 0.00038,
+        },
+        "litellm_response_cost": 0.00038,
+        "api_base": "https://api.openai.com/v1/",
+        "cache_hit": False,
+    }
+    _add_prompt_to_generation_params(
+        generation_params=generation_params, clean_metadata=clean_metadata
+    )
diff --git a/litellm/types/llms/vertex_ai.py b/litellm/types/llms/vertex_ai.py
index 0637933e2..145aaa359 100644
--- a/litellm/types/llms/vertex_ai.py
+++ b/litellm/types/llms/vertex_ai.py
@@ -153,6 +153,7 @@ class GenerationConfig(TypedDict, total=False):
     presence_penalty: float
     frequency_penalty: float
     response_mime_type: Literal["text/plain", "application/json"]
+    response_schema: dict
     seed: int
 
 
diff --git a/litellm/utils.py b/litellm/utils.py
index 31150111f..cce70c6f8 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -3239,8 +3239,15 @@ def get_optional_params(
             non_default_params=non_default_params,
             optional_params=optional_params,
             model=model,
+            drop_params=(
+                drop_params
+                if drop_params is not None and isinstance(drop_params, bool)
+                else False
+            ),
         )
-    elif custom_llm_provider == "vertex_ai_beta":
+    elif custom_llm_provider == "vertex_ai_beta" or (
+        custom_llm_provider == "vertex_ai" and "gemini" in model
+    ):
         supported_params = get_supported_openai_params(
             model=model, custom_llm_provider=custom_llm_provider
         )
@@ -3277,6 +3284,11 @@ def get_optional_params(
             non_default_params=non_default_params,
             optional_params=optional_params,
             model=model,
+            drop_params=(
+                drop_params
+                if drop_params is not None and isinstance(drop_params, bool)
+                else False
+            ),
         )
     elif custom_llm_provider == "vertex_ai" and model in litellm.vertex_mistral_models:
         supported_params = get_supported_openai_params(
@@ -3301,6 +3313,11 @@ def get_optional_params(
             non_default_params=non_default_params,
             optional_params=optional_params,
             model=model,
+            drop_params=(
+                drop_params
+                if drop_params is not None and isinstance(drop_params, bool)
+                else False
+            ),
         )
     elif custom_llm_provider == "sagemaker":
         ## check if unsupported param passed in
@@ -3710,6 +3727,7 @@ def get_optional_params(
             non_default_params=non_default_params,
             optional_params=optional_params,
             model=model,
+            drop_params=drop_params,
         )
     elif custom_llm_provider == "openrouter":
         supported_params = get_supported_openai_params(
@@ -3818,6 +3836,7 @@ def get_optional_params(
             non_default_params=non_default_params,
             optional_params=optional_params,
             model=model,
+            drop_params=drop_params,
         )
     elif custom_llm_provider == "azure":
         supported_params = get_supported_openai_params(
diff --git a/package-lock.json b/package-lock.json
index c9559251b..2856be614 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -5,12 +5,53 @@
   "packages": {
     "": {
       "dependencies": {
+        "prisma": "^5.17.0",
         "react-copy-to-clipboard": "^5.1.0"
       },
       "devDependencies": {
         "@types/react-copy-to-clipboard": "^5.0.7"
       }
     },
+    "node_modules/@prisma/debug": {
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/@prisma/debug/-/debug-5.17.0.tgz",
+      "integrity": "sha512-l7+AteR3P8FXiYyo496zkuoiJ5r9jLQEdUuxIxNCN1ud8rdbH3GTxm+f+dCyaSv9l9WY+29L9czaVRXz9mULfg=="
+    },
+    "node_modules/@prisma/engines": {
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/@prisma/engines/-/engines-5.17.0.tgz",
+      "integrity": "sha512-+r+Nf+JP210Jur+/X8SIPLtz+uW9YA4QO5IXA+KcSOBe/shT47bCcRMTYCbOESw3FFYFTwe7vU6KTWHKPiwvtg==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "@prisma/debug": "5.17.0",
+        "@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
+        "@prisma/fetch-engine": "5.17.0",
+        "@prisma/get-platform": "5.17.0"
+      }
+    },
+    "node_modules/@prisma/engines-version": {
+      "version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
+      "resolved": "https://registry.npmjs.org/@prisma/engines-version/-/engines-version-5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053.tgz",
+      "integrity": "sha512-tUuxZZysZDcrk5oaNOdrBnnkoTtmNQPkzINFDjz7eG6vcs9AVDmA/F6K5Plsb2aQc/l5M2EnFqn3htng9FA4hg=="
+    },
+    "node_modules/@prisma/fetch-engine": {
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/@prisma/fetch-engine/-/fetch-engine-5.17.0.tgz",
+      "integrity": "sha512-ESxiOaHuC488ilLPnrv/tM2KrPhQB5TRris/IeIV4ZvUuKeaicCl4Xj/JCQeG9IlxqOgf1cCg5h5vAzlewN91Q==",
+      "dependencies": {
+        "@prisma/debug": "5.17.0",
+        "@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
+        "@prisma/get-platform": "5.17.0"
+      }
+    },
+    "node_modules/@prisma/get-platform": {
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/@prisma/get-platform/-/get-platform-5.17.0.tgz",
+      "integrity": "sha512-UlDgbRozCP1rfJ5Tlkf3Cnftb6srGrEQ4Nm3og+1Se2gWmCZ0hmPIi+tQikGDUVLlvOWx3Gyi9LzgRP+HTXV9w==",
+      "dependencies": {
+        "@prisma/debug": "5.17.0"
+      }
+    },
     "node_modules/@types/prop-types": {
       "version": "15.7.12",
       "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.12.tgz",
@@ -74,6 +115,21 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/prisma": {
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/prisma/-/prisma-5.17.0.tgz",
+      "integrity": "sha512-m4UWkN5lBE6yevqeOxEvmepnL5cNPEjzMw2IqDB59AcEV6w7D8vGljDLd1gPFH+W6gUxw9x7/RmN5dCS/WTPxA==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "@prisma/engines": "5.17.0"
+      },
+      "bin": {
+        "prisma": "build/index.js"
+      },
+      "engines": {
+        "node": ">=16.13"
+      }
+    },
     "node_modules/prop-types": {
       "version": "15.8.1",
       "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
diff --git a/package.json b/package.json
index 8c12b767d..849d94f08 100644
--- a/package.json
+++ b/package.json
@@ -1,5 +1,6 @@
 {
   "dependencies": {
+    "prisma": "^5.17.0",
     "react-copy-to-clipboard": "^5.1.0"
   },
   "devDependencies": {
diff --git a/tests/llm_translation/test_max_completion_tokens.py b/tests/llm_translation/test_max_completion_tokens.py
index 6d5eb8e3c..2c61404e3 100644
--- a/tests/llm_translation/test_max_completion_tokens.py
+++ b/tests/llm_translation/test_max_completion_tokens.py
@@ -141,12 +141,12 @@ def test_all_model_configs():
         "max_completion_tokens" in VertexAILlama3Config().get_supported_openai_params()
     )
     assert VertexAILlama3Config().map_openai_params(
-        {"max_completion_tokens": 10}, {}, "llama3"
+        {"max_completion_tokens": 10}, {}, "llama3", drop_params=False
     ) == {"max_tokens": 10}
 
     assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params()
     assert VertexAIAi21Config().map_openai_params(
-        {"max_completion_tokens": 10}, {}, "llama3"
+        {"max_completion_tokens": 10}, {}, "llama3", drop_params=False
     ) == {"max_tokens": 10}
 
     from litellm.llms.fireworks_ai.chat.fireworks_ai_transformation import (
@@ -332,6 +332,7 @@ def test_all_model_configs():
         model="gemini-1.0-pro",
         non_default_params={"max_completion_tokens": 10},
         optional_params={},
+        drop_params=False,
     ) == {"max_output_tokens": 10}
 
     assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params()
diff --git a/tests/llm_translation/test_optional_params.py b/tests/llm_translation/test_optional_params.py
index 1250dbe24..e22e724d0 100644
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@@ -600,3 +600,35 @@ def test_o1_model_params():
     )
     assert optional_params["seed"] == 10
     assert optional_params["user"] == "John"
+
+
+@pytest.mark.parametrize(
+    "temperature, expected_error",
+    [(0.2, True), (1, False)],
+)
+def test_o1_model_temperature_params(temperature, expected_error):
+    if expected_error:
+        with pytest.raises(litellm.UnsupportedParamsError):
+            get_optional_params(
+                model="o1-preview-2024-09-12",
+                custom_llm_provider="openai",
+                temperature=temperature,
+            )
+    else:
+        get_optional_params(
+            model="o1-preview-2024-09-12",
+            custom_llm_provider="openai",
+            temperature=temperature,
+        )
+
+
+def test_unmapped_gemini_model_params():
+    """
+    Test if unmapped gemini model optional params are translated correctly
+    """
+    optional_params = get_optional_params(
+        model="gemini-new-model",
+        custom_llm_provider="vertex_ai",
+        stop="stop_word",
+    )
+    assert optional_params["stop_sequences"] == ["stop_word"]