LiteLLM Minor Fixes & Improvements (09/25/2024) (#5893)

* fix(langfuse.py): support new langfuse prompt_chat class init params * fix(langfuse.py): handle new init values on prompt chat + prompt text templates fixes error caused during langfuse logging * docs(openai_compatible.md): clarify `openai/` handles correct routing for `/v1/completions` route Fixes https://github.com/BerriAI/litellm/issues/5876 * fix(utils.py): handle unmapped gemini model optional param translation Fixes https://github.com/BerriAI/litellm/issues/5888 * fix(o1_transformation.py): fix o-1 validation, to not raise error if temperature=1 Fixes https://github.com/BerriAI/litellm/issues/5884 * fix(prisma_client.py): refresh iam token Fixes https://github.com/BerriAI/litellm/issues/5896 * fix: pass drop params where required * fix(utils.py): pass drop_params correctly * fix(types/vertex_ai.py): fix generation config * test(test_max_completion_tokens.py): fix test * fix(vertex_and_google_ai_studio_gemini.py): fix map openai params
2024-09-26 16:41:44 -07:00 · 2024-09-26 16:41:44 -07:00 · a1d9e96b31
commit a1d9e96b31
parent 16c0307eab
22 changed files with 755 additions and 292 deletions
--- a/docs/my-website/docs/providers/openai_compatible.md
+++ b/docs/my-website/docs/providers/openai_compatible.md
@ -7,7 +7,7 @@ To call models hosted behind an openai proxy, make 2 changes:

 1. For `/chat/completions`: Put `openai/` in front of your model name, so litellm knows you're trying to call an openai `/chat/completions` endpoint. 

-2. For `/completions`: Put `text-completion-openai/` in front of your model name, so litellm knows you're trying to call an openai `/completions` endpoint. 
+2. For `/completions`: Put `text-completion-openai/` in front of your model name, so litellm knows you're trying to call an openai `/completions` endpoint. [NOT REQUIRED for `openai/` endpoints called via `/v1/completions` route].

 2. **Do NOT** add anything additional to the base url e.g. `/v1/embedding`. LiteLLM uses the openai-client to make these calls, and that automatically adds the relevant endpoints. 

--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@ -1,6 +1,7 @@
 #### What this does ####
 #    On success, logs events to Langfuse
 import copy
+import inspect
 import os
 import traceback

@ -676,21 +677,37 @@ def _add_prompt_to_generation_params(
        elif "version" in user_prompt and "prompt" in user_prompt:
            # prompts
            if isinstance(user_prompt["prompt"], str):
-                _prompt_obj = Prompt_Text(
-                    name=user_prompt["name"],
-                    prompt=user_prompt["prompt"],
-                    version=user_prompt["version"],
-                    config=user_prompt.get("config", None),
+                prompt_text_params = getattr(
+                    Prompt_Text, "model_fields", Prompt_Text.__fields__
                )
+                _data = {
+                    "name": user_prompt["name"],
+                    "prompt": user_prompt["prompt"],
+                    "version": user_prompt["version"],
+                    "config": user_prompt.get("config", None),
+                }
+                if "labels" in prompt_text_params and "tags" in prompt_text_params:
+                    _data["labels"] = user_prompt.get("labels", []) or []
+                    _data["tags"] = user_prompt.get("tags", []) or []
+                _prompt_obj = Prompt_Text(**_data)  # type: ignore
                generation_params["prompt"] = TextPromptClient(prompt=_prompt_obj)

            elif isinstance(user_prompt["prompt"], list):
-                _prompt_obj = Prompt_Chat(
-                    name=user_prompt["name"],
-                    prompt=user_prompt["prompt"],
-                    version=user_prompt["version"],
-                    config=user_prompt.get("config", None),
+                prompt_chat_params = getattr(
+                    Prompt_Chat, "model_fields", Prompt_Chat.__fields__
                )
+                _data = {
+                    "name": user_prompt["name"],
+                    "prompt": user_prompt["prompt"],
+                    "version": user_prompt["version"],
+                    "config": user_prompt.get("config", None),
+                }
+                if "labels" in prompt_chat_params and "tags" in prompt_chat_params:
+                    _data["labels"] = user_prompt.get("labels", []) or []
+                    _data["tags"] = user_prompt.get("tags", []) or []
+
+                _prompt_obj = Prompt_Chat(**_data)  # type: ignore
+
                generation_params["prompt"] = ChatPromptClient(prompt=_prompt_obj)
            else:
                verbose_logger.error(
--- a/litellm/llms/OpenAI/chat/gpt_transformation.py
+++ b/litellm/llms/OpenAI/chat/gpt_transformation.py
@ -125,7 +125,11 @@ class OpenAIGPTConfig:
        return base_params + model_specific_params

    def _map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
    ) -> dict:
        supported_openai_params = self.get_supported_openai_params(model)
        for param, value in non_default_params.items():
@ -134,10 +138,15 @@ class OpenAIGPTConfig:
        return optional_params

    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
    ) -> dict:
        return self._map_openai_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
+            drop_params=drop_params,
        )
--- a/litellm/llms/OpenAI/chat/o1_transformation.py
+++ b/litellm/llms/OpenAI/chat/o1_transformation.py
@ -57,7 +57,6 @@ class OpenAIO1Config(OpenAIGPTConfig):
            "parallel_tool_calls",
            "function_call",
            "functions",
-            "temperature",
            "top_p",
            "n",
            "presence_penalty",
@ -73,13 +72,36 @@ class OpenAIO1Config(OpenAIGPTConfig):
        ]

    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
    ):
        if "max_tokens" in non_default_params:
            optional_params["max_completion_tokens"] = non_default_params.pop(
                "max_tokens"
            )
-        return super()._map_openai_params(non_default_params, optional_params, model)
+        if "temperature" in non_default_params:
+            temperature_value: Optional[float] = non_default_params.pop("temperature")
+            if temperature_value is not None:
+                if temperature_value == 0 or temperature_value == 1:
+                    optional_params["temperature"] = temperature_value
+                else:
+                    ## UNSUPPORTED TOOL CHOICE VALUE
+                    if litellm.drop_params is True or drop_params is True:
+                        pass
+                    else:
+                        raise litellm.utils.UnsupportedParamsError(
+                            message="O-1 doesn't support temperature={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
+                                temperature_value
+                            ),
+                            status_code=400,
+                        )
+
+        return super()._map_openai_params(
+            non_default_params, optional_params, model, drop_params
+        )

    def is_model_o1_reasoning_model(self, model: str) -> bool:
        if model in litellm.open_ai_chat_completion_models and "o1" in model:
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@ -413,7 +413,11 @@ class OpenAIConfig:
        return optional_params

    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
    ) -> dict:
        """ """
        if litellm.OpenAIO1Config().is_model_o1_reasoning_model(model=model):
@ -421,11 +425,13 @@ class OpenAIConfig:
                non_default_params=non_default_params,
                optional_params=optional_params,
                model=model,
+                drop_params=drop_params,
            )
        return litellm.OpenAIGPTConfig().map_openai_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
+            drop_params=drop_params,
        )


--- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py
@ -22,7 +22,7 @@ from litellm.types.llms.vertex_ai import (
    Tools,
 )

-from ..common_utils import get_supports_system_message, get_supports_response_schema
+from ..common_utils import get_supports_response_schema, get_supports_system_message
 from ..vertex_ai_non_gemini import _gemini_convert_messages_with_history


@ -73,8 +73,14 @@ def _transform_request_body(
        safety_settings: Optional[List[SafetSettingsConfig]] = optional_params.pop(
            "safety_settings", None
        )  # type: ignore
+        config_fields = GenerationConfig.__annotations__.keys()
+
+        filtered_params = {
+            k: v for k, v in optional_params.items() if k in config_fields
+        }
+
        generation_config: Optional[GenerationConfig] = GenerationConfig(
-            **optional_params
+            **filtered_params
        )
        data = RequestBody(contents=content)
        if system_instructions is not None:
@ -104,7 +110,7 @@ def sync_transform_request_body(
    timeout: Optional[Union[float, httpx.Timeout]],
    extra_headers: Optional[dict],
    optional_params: dict,
-    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
+    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,  # type: ignore
    custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
    litellm_params: dict,
 ) -> RequestBody:
@ -146,7 +152,7 @@ async def async_transform_request_body(
    timeout: Optional[Union[float, httpx.Timeout]],
    extra_headers: Optional[dict],
    optional_params: dict,
-    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
+    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,  # type: ignore
    custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
    litellm_params: dict,
 ) -> RequestBody:
@ -199,6 +205,7 @@ def _transform_system_message(
    if supports_system_message is True:
        for idx, message in enumerate(messages):
            if message["role"] == "system":
+                _system_content_block: Optional[PartType] = None
                if isinstance(message["content"], str):
                    _system_content_block = PartType(text=message["content"])
                elif isinstance(message["content"], list):
@ -206,6 +213,7 @@ def _transform_system_message(
                    for content in message["content"]:
                        system_text += content.get("text") or ""
                    _system_content_block = PartType(text=system_text)
+                if _system_content_block is not None:
                    system_content_blocks.append(_system_content_block)
                    system_prompt_indices.append(idx)
        if len(system_prompt_indices) > 0:
--- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
@ -252,233 +252,6 @@ class VertexAIConfig:
        ]


-class GoogleAIStudioGeminiConfig:  # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported
-    """
-    Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig
-
-    The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters:
-
-    - `temperature` (float): This controls the degree of randomness in token selection.
-
-    - `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
-
-    - `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
-
-    - `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
-
-    - `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`.
-
-    - `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response.
-
-    - `candidate_count` (int): Number of generated responses to return.
-
-    - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
-
-    Note: Please make sure to modify the default parameters as required for your use case.
-    """
-
-    temperature: Optional[float] = None
-    max_output_tokens: Optional[int] = None
-    top_p: Optional[float] = None
-    top_k: Optional[int] = None
-    response_mime_type: Optional[str] = None
-    response_schema: Optional[dict] = None
-    candidate_count: Optional[int] = None
-    stop_sequences: Optional[list] = None
-
-    def __init__(
-        self,
-        temperature: Optional[float] = None,
-        max_output_tokens: Optional[int] = None,
-        top_p: Optional[float] = None,
-        top_k: Optional[int] = None,
-        response_mime_type: Optional[str] = None,
-        response_schema: Optional[dict] = None,
-        candidate_count: Optional[int] = None,
-        stop_sequences: Optional[list] = None,
-    ) -> None:
-        locals_ = locals()
-        for key, value in locals_.items():
-            if key != "self" and value is not None:
-                setattr(self.__class__, key, value)
-
-    @classmethod
-    def get_config(cls):
-        return {
-            k: v
-            for k, v in cls.__dict__.items()
-            if not k.startswith("__")
-            and not isinstance(
-                v,
-                (
-                    types.FunctionType,
-                    types.BuiltinFunctionType,
-                    classmethod,
-                    staticmethod,
-                ),
-            )
-            and v is not None
-        }
-
-    def get_supported_openai_params(self):
-        return [
-            "temperature",
-            "top_p",
-            "max_tokens",
-            "max_completion_tokens",
-            "stream",
-            "tools",
-            "tool_choice",
-            "functions",
-            "response_format",
-            "n",
-            "stop",
-        ]
-
-    def _map_function(self, value: List[dict]) -> List[Tools]:
-        gtool_func_declarations = []
-        googleSearchRetrieval: Optional[dict] = None
-
-        for tool in value:
-            openai_function_object: Optional[ChatCompletionToolParamFunctionChunk] = (
-                None
-            )
-            if "function" in tool:  # tools list
-                openai_function_object = ChatCompletionToolParamFunctionChunk(  # type: ignore
-                    **tool["function"]
-                )
-            elif "name" in tool:  # functions list
-                openai_function_object = ChatCompletionToolParamFunctionChunk(**tool)  # type: ignore
-
-            # check if grounding
-            if tool.get("googleSearchRetrieval", None) is not None:
-                googleSearchRetrieval = tool["googleSearchRetrieval"]
-            elif openai_function_object is not None:
-                gtool_func_declaration = FunctionDeclaration(
-                    name=openai_function_object["name"],
-                    description=openai_function_object.get("description", ""),
-                    parameters=openai_function_object.get("parameters", {}),
-                )
-                gtool_func_declarations.append(gtool_func_declaration)
-            else:
-                # assume it's a provider-specific param
-                verbose_logger.warning(
-                    "Invalid tool={}. Use `litellm.set_verbose` or `litellm --detailed_debug` to see raw request."
-                )
-
-        _tools = Tools(
-            function_declarations=gtool_func_declarations,
-        )
-        if googleSearchRetrieval is not None:
-            _tools["googleSearchRetrieval"] = googleSearchRetrieval
-        return [_tools]
-
-    def map_tool_choice_values(
-        self, model: str, tool_choice: Union[str, dict]
-    ) -> Optional[ToolConfig]:
-        if tool_choice == "none":
-            return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="NONE"))
-        elif tool_choice == "required":
-            return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="ANY"))
-        elif tool_choice == "auto":
-            return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="AUTO"))
-        elif isinstance(tool_choice, dict):
-            # only supported for anthropic + mistral models - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
-            name = tool_choice.get("function", {}).get("name", "")
-            return ToolConfig(
-                functionCallingConfig=FunctionCallingConfig(
-                    mode="ANY", allowed_function_names=[name]
-                )
-            )
-        else:
-            raise litellm.utils.UnsupportedParamsError(
-                message="VertexAI doesn't support tool_choice={}. Supported tool_choice values=['auto', 'required', json object]. To drop it from the call, set `litellm.drop_params = True.".format(
-                    tool_choice
-                ),
-                status_code=400,
-            )
-
-    def map_openai_params(
-        self,
-        model: str,
-        non_default_params: dict,
-        optional_params: dict,
-    ):
-        for param, value in non_default_params.items():
-            if param == "temperature":
-                optional_params["temperature"] = value
-            if param == "top_p":
-                optional_params["top_p"] = value
-            if (
-                param == "stream" and value is True
-            ):  # sending stream = False, can cause it to get passed unchecked and raise issues
-                optional_params["stream"] = value
-            if param == "n":
-                optional_params["candidate_count"] = value
-            if param == "stop":
-                if isinstance(value, str):
-                    optional_params["stop_sequences"] = [value]
-                elif isinstance(value, list):
-                    optional_params["stop_sequences"] = value
-            if param == "max_tokens" or param == "max_completion_tokens":
-                optional_params["max_output_tokens"] = value
-            if param == "response_format":  # type: ignore
-                if value["type"] == "json_object":  # type: ignore
-                    if value["type"] == "json_object":  # type: ignore
-                        optional_params["response_mime_type"] = "application/json"
-                    elif value["type"] == "text":  # type: ignore
-                        optional_params["response_mime_type"] = "text/plain"
-                    if "response_schema" in value:  # type: ignore
-                        optional_params["response_mime_type"] = "application/json"
-                        optional_params["response_schema"] = value["response_schema"]  # type: ignore
-                elif value["type"] == "json_schema":  # type: ignore
-                    if "json_schema" in value and "schema" in value["json_schema"]:  # type: ignore
-                        optional_params["response_mime_type"] = "application/json"
-                        optional_params["response_schema"] = value["json_schema"]["schema"]  # type: ignore
-            if (param == "tools" or param == "functions") and isinstance(value, list):
-                optional_params["tools"] = self._map_function(value=value)
-                optional_params["litellm_param_is_function_call"] = (
-                    True if param == "functions" else False
-                )
-            if param == "tool_choice" and (
-                isinstance(value, str) or isinstance(value, dict)
-            ):
-                _tool_choice_value = self.map_tool_choice_values(
-                    model=model, tool_choice=value  # type: ignore
-                )
-                if _tool_choice_value is not None:
-                    optional_params["tool_choice"] = _tool_choice_value
-        return optional_params
-
-    def get_mapped_special_auth_params(self) -> dict:
-        """
-        Common auth params across bedrock/vertex_ai/azure/watsonx
-        """
-        return {"project": "vertex_project", "region_name": "vertex_location"}
-
-    def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
-        mapped_params = self.get_mapped_special_auth_params()
-
-        for param, value in non_default_params.items():
-            if param in mapped_params:
-                optional_params[mapped_params[param]] = value
-        return optional_params
-
-    def get_flagged_finish_reasons(self) -> Dict[str, str]:
-        """
-        Return Dictionary of finish reasons which indicate response was flagged
-
-        and what it means
-        """
-        return {
-            "SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.",
-            "RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.",
-            "BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.",
-            "PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.",
-            "SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
-        }
-
-
 class VertexGeminiConfig:
    """
    Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts
@ -752,6 +525,108 @@ class VertexGeminiConfig:
        return exception_string


+class GoogleAIStudioGeminiConfig(
+    VertexGeminiConfig
+):  # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported
+    """
+    Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig
+
+    The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters:
+
+    - `temperature` (float): This controls the degree of randomness in token selection.
+
+    - `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
+
+    - `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
+
+    - `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
+
+    - `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`.
+
+    - `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response.
+
+    - `candidate_count` (int): Number of generated responses to return.
+
+    - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
+
+    Note: Please make sure to modify the default parameters as required for your use case.
+    """
+
+    temperature: Optional[float] = None
+    max_output_tokens: Optional[int] = None
+    top_p: Optional[float] = None
+    top_k: Optional[int] = None
+    response_mime_type: Optional[str] = None
+    response_schema: Optional[dict] = None
+    candidate_count: Optional[int] = None
+    stop_sequences: Optional[list] = None
+
+    def __init__(
+        self,
+        temperature: Optional[float] = None,
+        max_output_tokens: Optional[int] = None,
+        top_p: Optional[float] = None,
+        top_k: Optional[int] = None,
+        response_mime_type: Optional[str] = None,
+        response_schema: Optional[dict] = None,
+        candidate_count: Optional[int] = None,
+        stop_sequences: Optional[list] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params(self):
+        return [
+            "temperature",
+            "top_p",
+            "max_tokens",
+            "max_completion_tokens",
+            "stream",
+            "tools",
+            "tool_choice",
+            "functions",
+            "response_format",
+            "n",
+            "stop",
+        ]
+
+    def map_openai_params(
+        self,
+        model: str,
+        non_default_params: Dict,
+        optional_params: Dict,
+        drop_params: bool,
+    ):
+        # drop frequency_penalty and presence_penalty
+        if "frequency_penalty" in non_default_params:
+            del non_default_params["frequency_penalty"]
+        if "presence_penalty" in non_default_params:
+            del non_default_params["presence_penalty"]
+        return super().map_openai_params(
+            model, non_default_params, optional_params, drop_params
+        )
+
+
 async def make_call(
    client: Optional[AsyncHTTPHandler],
    api_base: str,
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py
@ -44,7 +44,11 @@ class VertexAIAi21Config:
        return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")

    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
    ):
        if "max_completion_tokens" in non_default_params:
            non_default_params["max_tokens"] = non_default_params.pop(
@ -54,4 +58,5 @@ class VertexAIAi21Config:
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
+            drop_params=drop_params,
        )
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py
@ -50,7 +50,11 @@ class VertexAILlama3Config:
        return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")

    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
    ):
        if "max_completion_tokens" in non_default_params:
            non_default_params["max_tokens"] = non_default_params.pop(
@ -60,4 +64,5 @@ class VertexAILlama3Config:
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
+            drop_params=drop_params,
        )
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -31,15 +31,21 @@ model_list:
  - model_name: "anthropic/*"
    litellm_params:
      model: "anthropic/*"
-  - model_name: "openai/*"
+  - model_name: "*"
    litellm_params:
      model: "openai/*"
  - model_name: "fireworks_ai/*"
    litellm_params:
      model: "fireworks_ai/*"
      configurable_clientside_auth_params: ["api_base"]
-
+  - model_name: "gemini-flash-experimental"
+    litellm_params:
+      model: "vertex_ai/gemini-flash-experimental"

 litellm_settings:
-  success_callback: ["langfuse"]
-  cache: true
+  success_callback: ["langfuse", "prometheus"]
+  failure_callback: ["prometheus"]
+
+general_settings: 
+  proxy_budget_rescheduler_min_time: 1
+  proxy_budget_rescheduler_max_time: 1
--- a/litellm/proxy/auth/rds_iam_token.py
+++ b/litellm/proxy/auth/rds_iam_token.py
@ -1,5 +1,5 @@
 import os
-from typing import Optional, Union
+from typing import Any, Optional, Union

 import httpx

@ -34,7 +34,7 @@ def init_rds_client(
    # Iterate over parameters and update if needed
    for i, param in enumerate(params_to_check):
        if param and param.startswith("os.environ/"):
-            params_to_check[i] = get_secret(param)
+            params_to_check[i] = get_secret(param)  # type: ignore
    # Assign updated values back to parameters
    (
        aws_access_key_id,
@ -62,13 +62,13 @@ def init_rds_client(
    import boto3

    if isinstance(timeout, float):
-        config = boto3.session.Config(connect_timeout=timeout, read_timeout=timeout)
+        config = boto3.session.Config(connect_timeout=timeout, read_timeout=timeout)  # type: ignore
    elif isinstance(timeout, httpx.Timeout):
-        config = boto3.session.Config(
+        config = boto3.session.Config(  # type: ignore
            connect_timeout=timeout.connect, read_timeout=timeout.read
        )
    else:
-        config = boto3.session.Config()
+        config = boto3.session.Config()  # type: ignore

    ### CHECK STS ###
    if (
@ -105,6 +105,7 @@ def init_rds_client(
            region_name=region_name,
            config=config,
        )
+
    elif aws_role_name is not None and aws_session_name is not None:
        # use sts if role name passed in
        sts_client = boto3.client(
@ -144,6 +145,7 @@ def init_rds_client(
            region_name=region_name,
            config=config,
        )
+
    else:
        # aws_access_key_id is None, assume user is trying to auth using env variables
        # boto3 automatically reads env variables
@ -157,11 +159,14 @@ def init_rds_client(
    return client


-def generate_iam_auth_token(db_host, db_port, db_user) -> str:
+def generate_iam_auth_token(
+    db_host, db_port, db_user, client: Optional[Any] = None
+) -> str:
    from urllib.parse import quote

    import boto3

+    if client is None:
        boto_client = init_rds_client(
            aws_region_name=os.getenv("AWS_REGION_NAME"),
            aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
@ -173,9 +178,12 @@ def generate_iam_auth_token(db_host, db_port, db_user) -> str:
                "AWS_WEB_IDENTITY_TOKEN", os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
            ),
        )
+    else:
+        boto_client = client

    token = boto_client.generate_db_auth_token(
        DBHostname=db_host, Port=db_port, DBUsername=db_user
    )
    cleaned_token = quote(token, safe="")
+
    return cleaned_token
--- a/litellm/proxy/db/prisma_client.py
+++ b/litellm/proxy/db/prisma_client.py
@ -0,0 +1,106 @@
+import asyncio
+import os
+import urllib
+import urllib.parse
+from datetime import datetime, timedelta
+from typing import Any, Callable, Optional
+
+
+class PrismaWrapper:
+    def __init__(self, original_prisma: Any, iam_token_db_auth: bool):
+        self._original_prisma = original_prisma
+        self.iam_token_db_auth = iam_token_db_auth
+
+    def is_token_expired(self, token_url: Optional[str]) -> bool:
+        if token_url is None:
+            return True
+        # Decode the token URL to handle URL-encoded characters
+        decoded_url = urllib.parse.unquote(token_url)
+
+        # Parse the token URL
+        parsed_url = urllib.parse.urlparse(decoded_url)
+
+        # Parse the query parameters from the path component (if they exist there)
+        query_params = urllib.parse.parse_qs(parsed_url.query)
+
+        # Get expiration time from the query parameters
+        expires = query_params.get("X-Amz-Expires", [None])[0]
+        if expires is None:
+            raise ValueError("X-Amz-Expires parameter is missing or invalid.")
+
+        expires_int = int(expires)
+
+        # Get the token's creation time from the X-Amz-Date parameter
+        token_time_str = query_params.get("X-Amz-Date", [""])[0]
+        if not token_time_str:
+            raise ValueError("X-Amz-Date parameter is missing or invalid.")
+
+        # Ensure the token time string is parsed correctly
+        try:
+            token_time = datetime.strptime(token_time_str, "%Y%m%dT%H%M%SZ")
+        except ValueError as e:
+            raise ValueError(f"Invalid X-Amz-Date format: {e}")
+
+        # Calculate the expiration time
+        expiration_time = token_time + timedelta(seconds=expires_int)
+
+        # Current time in UTC
+        current_time = datetime.utcnow()
+
+        # Check if the token is expired
+        return current_time > expiration_time
+
+    def get_rds_iam_token(self) -> Optional[str]:
+        if self.iam_token_db_auth:
+            from litellm.proxy.auth.rds_iam_token import generate_iam_auth_token
+
+            db_host = os.getenv("DATABASE_HOST")
+            db_port = os.getenv("DATABASE_PORT")
+            db_user = os.getenv("DATABASE_USER")
+            db_name = os.getenv("DATABASE_NAME")
+            db_schema = os.getenv("DATABASE_SCHEMA")
+
+            token = generate_iam_auth_token(
+                db_host=db_host, db_port=db_port, db_user=db_user
+            )
+
+            # print(f"token: {token}")
+            _db_url = f"postgresql://{db_user}:{token}@{db_host}:{db_port}/{db_name}"
+            if db_schema:
+                _db_url += f"?schema={db_schema}"
+
+            os.environ["DATABASE_URL"] = _db_url
+            return _db_url
+        return None
+
+    async def recreate_prisma_client(
+        self, new_db_url: str, http_client: Optional[Any] = None
+    ):
+        from prisma import Prisma  # type: ignore
+
+        if http_client is not None:
+            self._original_prisma = Prisma(http=http_client)
+        else:
+            self._original_prisma = Prisma()
+
+        await self._original_prisma.connect()
+
+    def __getattr__(self, name: str):
+        original_attr = getattr(self._original_prisma, name)
+        if self.iam_token_db_auth:
+            db_url = os.getenv("DATABASE_URL")
+            if self.is_token_expired(db_url):
+                db_url = self.get_rds_iam_token()
+                loop = asyncio.get_event_loop()
+
+                if db_url:
+                    if loop.is_running():
+                        asyncio.run_coroutine_threadsafe(
+                            self.recreate_prisma_client(db_url), loop
+                        )
+                    else:
+                        asyncio.run(self.recreate_prisma_client(db_url))
+                else:
+                    raise ValueError("Failed to get RDS IAM token")
+
+        return original_attr
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -40,7 +40,7 @@ def append_query_params(url, params) -> str:
    parsed_query.update(params)
    encoded_query = urlparse.urlencode(parsed_query, doseq=True)
    modified_url = urlparse.urlunparse(parsed_url._replace(query=encoded_query))
-    return modified_url
+    return modified_url  # type: ignore


 def run_ollama_serve():
@ -287,7 +287,7 @@ def run_server(
                    save_worker_config,
                )
    if version == True:
-        pkg_version = importlib.metadata.version("litellm")
+        pkg_version = importlib.metadata.version("litellm")  # type: ignore
        click.echo(f"\nLiteLLM: Current Version = {pkg_version}\n")
        return
    if model and "ollama" in model and api_base is None:
@ -338,14 +338,14 @@ def run_server(
        futures = []
        start_time = time.time()
        # Make concurrent calls
-        with concurrent.futures.ThreadPoolExecutor(
+        with concurrent.futures.ThreadPoolExecutor(  # type: ignore
            max_workers=concurrent_calls
        ) as executor:
            for _ in range(concurrent_calls):
                futures.append(executor.submit(_make_openai_completion))

        # Wait for all futures to complete
-        concurrent.futures.wait(futures)
+        concurrent.futures.wait(futures)  # type: ignore

        # Summarize the results
        successful_calls = 0
@ -476,6 +476,7 @@ def run_server(
                _db_url += f"?schema={db_schema}"

            os.environ["DATABASE_URL"] = _db_url
+            os.environ["IAM_TOKEN_DB_AUTH"] = "True"

        ### DECRYPT ENV VAR ###

@ -600,8 +601,9 @@ def run_server(
                    0, os.path.abspath("../..")
                )  # Adds the parent directory to the system path - for litellm local dev
                import litellm
+                from litellm import get_secret_str

-                database_url = litellm.get_secret(database_url, default_value=None)
+                database_url = get_secret_str(database_url, default_value=None)
                os.chdir(original_dir)
            if database_url is not None and isinstance(database_url, str):
                os.environ["DATABASE_URL"] = database_url
@ -650,6 +652,8 @@ def run_server(
                        subprocess.run(["prisma", "db", "push", "--accept-data-loss"])
                        break  # Exit the loop if the subprocess succeeds
                    except subprocess.CalledProcessError as e:
+                        import time
+
                        print(f"Error: {e}")  # noqa
                        time.sleep(random.randrange(start=1, stop=5))
                    finally:
@ -728,12 +732,16 @@ def run_server(

                def load_config(self):
                    # note: This Loads the gunicorn config - has nothing to do with LiteLLM Proxy config
+                    if self.cfg is not None:
                        config = {
                            key: value
                            for key, value in self.options.items()
                            if key in self.cfg.settings and value is not None
                        }
+                    else:
+                        config = {}
                    for key, value in config.items():
+                        if self.cfg is not None:
                            self.cfg.set(key.lower(), value)

                def load(self):
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -65,11 +65,13 @@ from litellm.proxy.db.create_views import (
    create_missing_views,
    should_create_missing_views,
 )
+from litellm.proxy.db.prisma_client import PrismaWrapper
 from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
 from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
 from litellm.proxy.hooks.parallel_request_limiter import (
    _PROXY_MaxParallelRequestsHandler,
 )
+from litellm.secret_managers.main import str_to_bool
 from litellm.types.utils import CallTypes, LoggedLiteLLMParams

 if TYPE_CHECKING:
@ -1017,6 +1019,9 @@ class PrismaClient:
        )
        ## init logging object
        self.proxy_logging_obj = proxy_logging_obj
+        self.iam_token_db_auth: Optional[bool] = str_to_bool(
+            os.getenv("IAM_TOKEN_DB_AUTH")
+        )
        try:
            from prisma import Prisma  # type: ignore
        except Exception as e:
@ -1043,9 +1048,23 @@ class PrismaClient:
            from prisma import Prisma  # type: ignore
        verbose_proxy_logger.debug("Connecting Prisma Client to DB..")
        if http_client is not None:
-            self.db = Prisma(http=http_client)
+            self.db = PrismaWrapper(
+                original_prisma=Prisma(http=http_client),
+                iam_token_db_auth=(
+                    self.iam_token_db_auth
+                    if self.iam_token_db_auth is not None
+                    else False
+                ),
+            )
        else:
-            self.db = Prisma()  # Client to connect to Prisma db
+            self.db = PrismaWrapper(
+                original_prisma=Prisma(),
+                iam_token_db_auth=(
+                    self.iam_token_db_auth
+                    if self.iam_token_db_auth is not None
+                    else False
+                ),
+            )  # Client to connect to Prisma db
        verbose_proxy_logger.debug("Success - Connected Prisma Client to DB")

    def hash_token(self, token: str):
@ -1141,9 +1160,9 @@ class PrismaClient:
                        "LiteLLM_VerificationTokenView Created in DB!"
                    )
                else:
-                    should_create_views = await should_create_missing_views(db=self.db)
+                    should_create_views = await should_create_missing_views(db=self.db.db)  # type: ignore
                    if should_create_views:
-                        await create_missing_views(db=self.db)
+                        await create_missing_views(db=self.db)  # type: ignore
                    else:
                        # don't block execution if these views are missing
                        # Convert lists to sets for efficient difference calculation
--- a/litellm/secret_managers/main.py
+++ b/litellm/secret_managers/main.py
@ -29,7 +29,7 @@ def _is_base64(s):
        return False


-def str_to_bool(value: str) -> Optional[bool]:
+def str_to_bool(value: Optional[str]) -> Optional[bool]:
    """
    Converts a string to a boolean if it's a recognized boolean string.
    Returns None if the string is not a recognized boolean value.
@ -37,6 +37,9 @@ def str_to_bool(value: str) -> Optional[bool]:
    :param value: The string to be checked.
    :return: True or False if the string is a recognized boolean, otherwise None.
    """
+    if value is None:
+        return None
+
    true_values = {"true"}
    false_values = {"false"}

--- a/litellm/tests/test_alangfuse.py
+++ b/litellm/tests/test_alangfuse.py
@ -968,3 +968,259 @@ def test_aaalangfuse_dynamic_logging():
    )

    langfuse_client.get_trace(id=trace_id)
+
+
+import datetime
+
+generation_params = {
+    "name": "litellm-acompletion",
+    "id": "time-10-35-32-316778_chatcmpl-ABQDEzVJS8fziPdvkeTA3tnQaxeMX",
+    "start_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 316778),
+    "end_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 897141),
+    "model": "gpt-4o",
+    "model_parameters": {
+        "stream": False,
+        "max_retries": 0,
+        "extra_body": "{}",
+        "system_fingerprint": "fp_52a7f40b0b",
+    },
+    "input": {
+        "messages": [
+            {"content": "<>", "role": "system"},
+            {"content": "<>", "role": "user"},
+        ]
+    },
+    "output": {
+        "content": "Hello! It looks like your message might have been sent by accident. How can I assist you today?",
+        "role": "assistant",
+        "tool_calls": None,
+        "function_call": None,
+    },
+    "usage": {"prompt_tokens": 13, "completion_tokens": 21, "total_cost": 0.00038},
+    "metadata": {
+        "prompt": {
+            "name": "conversational-service-answer_question_restricted_reply",
+            "version": 9,
+            "config": {},
+            "labels": ["latest", "staging", "production"],
+            "tags": ["conversational-service"],
+            "prompt": [
+                {"role": "system", "content": "<>"},
+                {"role": "user", "content": "{{text}}"},
+            ],
+        },
+        "requester_metadata": {
+            "session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1",
+            "trace_name": "jess",
+            "tags": ["conversational-service", "generative-ai-engine", "staging"],
+            "prompt": {
+                "name": "conversational-service-answer_question_restricted_reply",
+                "version": 9,
+                "config": {},
+                "labels": ["latest", "staging", "production"],
+                "tags": ["conversational-service"],
+                "prompt": [
+                    {"role": "system", "content": "<>"},
+                    {"role": "user", "content": "{{text}}"},
+                ],
+            },
+        },
+        "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
+        "litellm_api_version": "0.0.0",
+        "user_api_key_user_id": "default_user_id",
+        "user_api_key_spend": 0.0,
+        "user_api_key_metadata": {},
+        "requester_ip_address": "127.0.0.1",
+        "model_group": "gpt-4o",
+        "model_group_size": 0,
+        "deployment": "gpt-4o",
+        "model_info": {
+            "id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
+            "db_model": False,
+        },
+        "hidden_params": {
+            "headers": {
+                "date": "Wed, 25 Sep 2024 17:35:32 GMT",
+                "content-type": "application/json",
+                "transfer-encoding": "chunked",
+                "connection": "keep-alive",
+                "access-control-expose-headers": "X-Request-ID",
+                "openai-organization": "reliablekeystest",
+                "openai-processing-ms": "329",
+                "openai-version": "2020-10-01",
+                "strict-transport-security": "max-age=31536000; includeSubDomains; preload",
+                "x-ratelimit-limit-requests": "10000",
+                "x-ratelimit-limit-tokens": "30000000",
+                "x-ratelimit-remaining-requests": "9999",
+                "x-ratelimit-remaining-tokens": "29999980",
+                "x-ratelimit-reset-requests": "6ms",
+                "x-ratelimit-reset-tokens": "0s",
+                "x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
+                "cf-cache-status": "DYNAMIC",
+                "set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
+                "x-content-type-options": "nosniff",
+                "server": "cloudflare",
+                "cf-ray": "8c8cc573becb232c-SJC",
+                "content-encoding": "gzip",
+                "alt-svc": 'h3=":443"; ma=86400',
+            },
+            "additional_headers": {
+                "llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT",
+                "llm_provider-content-type": "application/json",
+                "llm_provider-transfer-encoding": "chunked",
+                "llm_provider-connection": "keep-alive",
+                "llm_provider-access-control-expose-headers": "X-Request-ID",
+                "llm_provider-openai-organization": "reliablekeystest",
+                "llm_provider-openai-processing-ms": "329",
+                "llm_provider-openai-version": "2020-10-01",
+                "llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload",
+                "llm_provider-x-ratelimit-limit-requests": "10000",
+                "llm_provider-x-ratelimit-limit-tokens": "30000000",
+                "llm_provider-x-ratelimit-remaining-requests": "9999",
+                "llm_provider-x-ratelimit-remaining-tokens": "29999980",
+                "llm_provider-x-ratelimit-reset-requests": "6ms",
+                "llm_provider-x-ratelimit-reset-tokens": "0s",
+                "llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
+                "llm_provider-cf-cache-status": "DYNAMIC",
+                "llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
+                "llm_provider-x-content-type-options": "nosniff",
+                "llm_provider-server": "cloudflare",
+                "llm_provider-cf-ray": "8c8cc573becb232c-SJC",
+                "llm_provider-content-encoding": "gzip",
+                "llm_provider-alt-svc": 'h3=":443"; ma=86400',
+            },
+            "litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c",
+            "model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
+            "api_base": "https://api.openai.com",
+            "optional_params": {
+                "stream": False,
+                "max_retries": 0,
+                "extra_body": {},
+            },
+            "response_cost": 0.00038,
+        },
+        "litellm_response_cost": 0.00038,
+        "api_base": "https://api.openai.com/v1/",
+        "cache_hit": False,
+    },
+    "level": "DEFAULT",
+    "version": None,
+}
+
+
+@pytest.mark.parametrize(
+    "prompt",
+    [
+        [
+            {"role": "system", "content": "<>"},
+            {"role": "user", "content": "{{text}}"},
+        ],
+        "hello world",
+    ],
+)
+def test_langfuse_prompt_type(prompt):
+
+    from litellm.integrations.langfuse import _add_prompt_to_generation_params
+
+    clean_metadata = {
+        "prompt": {
+            "name": "conversational-service-answer_question_restricted_reply",
+            "version": 9,
+            "config": {},
+            "labels": ["latest", "staging", "production"],
+            "tags": ["conversational-service"],
+            "prompt": prompt,
+        },
+        "requester_metadata": {
+            "session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1",
+            "trace_name": "jess",
+            "tags": ["conversational-service", "generative-ai-engine", "staging"],
+            "prompt": {
+                "name": "conversational-service-answer_question_restricted_reply",
+                "version": 9,
+                "config": {},
+                "labels": ["latest", "staging", "production"],
+                "tags": ["conversational-service"],
+                "prompt": [
+                    {"role": "system", "content": "<>"},
+                    {"role": "user", "content": "{{text}}"},
+                ],
+            },
+        },
+        "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
+        "litellm_api_version": "0.0.0",
+        "user_api_key_user_id": "default_user_id",
+        "user_api_key_spend": 0.0,
+        "user_api_key_metadata": {},
+        "requester_ip_address": "127.0.0.1",
+        "model_group": "gpt-4o",
+        "model_group_size": 0,
+        "deployment": "gpt-4o",
+        "model_info": {
+            "id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
+            "db_model": False,
+        },
+        "hidden_params": {
+            "headers": {
+                "date": "Wed, 25 Sep 2024 17:35:32 GMT",
+                "content-type": "application/json",
+                "transfer-encoding": "chunked",
+                "connection": "keep-alive",
+                "access-control-expose-headers": "X-Request-ID",
+                "openai-organization": "reliablekeystest",
+                "openai-processing-ms": "329",
+                "openai-version": "2020-10-01",
+                "strict-transport-security": "max-age=31536000; includeSubDomains; preload",
+                "x-ratelimit-limit-requests": "10000",
+                "x-ratelimit-limit-tokens": "30000000",
+                "x-ratelimit-remaining-requests": "9999",
+                "x-ratelimit-remaining-tokens": "29999980",
+                "x-ratelimit-reset-requests": "6ms",
+                "x-ratelimit-reset-tokens": "0s",
+                "x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
+                "cf-cache-status": "DYNAMIC",
+                "set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
+                "x-content-type-options": "nosniff",
+                "server": "cloudflare",
+                "cf-ray": "8c8cc573becb232c-SJC",
+                "content-encoding": "gzip",
+                "alt-svc": 'h3=":443"; ma=86400',
+            },
+            "additional_headers": {
+                "llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT",
+                "llm_provider-content-type": "application/json",
+                "llm_provider-transfer-encoding": "chunked",
+                "llm_provider-connection": "keep-alive",
+                "llm_provider-access-control-expose-headers": "X-Request-ID",
+                "llm_provider-openai-organization": "reliablekeystest",
+                "llm_provider-openai-processing-ms": "329",
+                "llm_provider-openai-version": "2020-10-01",
+                "llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload",
+                "llm_provider-x-ratelimit-limit-requests": "10000",
+                "llm_provider-x-ratelimit-limit-tokens": "30000000",
+                "llm_provider-x-ratelimit-remaining-requests": "9999",
+                "llm_provider-x-ratelimit-remaining-tokens": "29999980",
+                "llm_provider-x-ratelimit-reset-requests": "6ms",
+                "llm_provider-x-ratelimit-reset-tokens": "0s",
+                "llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
+                "llm_provider-cf-cache-status": "DYNAMIC",
+                "llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
+                "llm_provider-x-content-type-options": "nosniff",
+                "llm_provider-server": "cloudflare",
+                "llm_provider-cf-ray": "8c8cc573becb232c-SJC",
+                "llm_provider-content-encoding": "gzip",
+                "llm_provider-alt-svc": 'h3=":443"; ma=86400',
+            },
+            "litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c",
+            "model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
+            "api_base": "https://api.openai.com",
+            "optional_params": {"stream": False, "max_retries": 0, "extra_body": {}},
+            "response_cost": 0.00038,
+        },
+        "litellm_response_cost": 0.00038,
+        "api_base": "https://api.openai.com/v1/",
+        "cache_hit": False,
+    }
+    _add_prompt_to_generation_params(
+        generation_params=generation_params, clean_metadata=clean_metadata
+    )
--- a/litellm/types/llms/vertex_ai.py
+++ b/litellm/types/llms/vertex_ai.py
@ -153,6 +153,7 @@ class GenerationConfig(TypedDict, total=False):
    presence_penalty: float
    frequency_penalty: float
    response_mime_type: Literal["text/plain", "application/json"]
+    response_schema: dict
    seed: int


--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -3239,8 +3239,15 @@ def get_optional_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
+            drop_params=(
+                drop_params
+                if drop_params is not None and isinstance(drop_params, bool)
+                else False
+            ),
        )
-    elif custom_llm_provider == "vertex_ai_beta":
+    elif custom_llm_provider == "vertex_ai_beta" or (
+        custom_llm_provider == "vertex_ai" and "gemini" in model
+    ):
        supported_params = get_supported_openai_params(
            model=model, custom_llm_provider=custom_llm_provider
        )
@ -3277,6 +3284,11 @@ def get_optional_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
+            drop_params=(
+                drop_params
+                if drop_params is not None and isinstance(drop_params, bool)
+                else False
+            ),
        )
    elif custom_llm_provider == "vertex_ai" and model in litellm.vertex_mistral_models:
        supported_params = get_supported_openai_params(
@ -3301,6 +3313,11 @@ def get_optional_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
+            drop_params=(
+                drop_params
+                if drop_params is not None and isinstance(drop_params, bool)
+                else False
+            ),
        )
    elif custom_llm_provider == "sagemaker":
        ## check if unsupported param passed in
@ -3710,6 +3727,7 @@ def get_optional_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
+            drop_params=drop_params,
        )
    elif custom_llm_provider == "openrouter":
        supported_params = get_supported_openai_params(
@ -3818,6 +3836,7 @@ def get_optional_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
+            drop_params=drop_params,
        )
    elif custom_llm_provider == "azure":
        supported_params = get_supported_openai_params(
--- a/package-lock.json
+++ b/package-lock.json
@ -5,12 +5,53 @@
  "packages": {
    "": {
      "dependencies": {
+        "prisma": "^5.17.0",
        "react-copy-to-clipboard": "^5.1.0"
      },
      "devDependencies": {
        "@types/react-copy-to-clipboard": "^5.0.7"
      }
    },
+    "node_modules/@prisma/debug": {
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/@prisma/debug/-/debug-5.17.0.tgz",
+      "integrity": "sha512-l7+AteR3P8FXiYyo496zkuoiJ5r9jLQEdUuxIxNCN1ud8rdbH3GTxm+f+dCyaSv9l9WY+29L9czaVRXz9mULfg=="
+    },
+    "node_modules/@prisma/engines": {
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/@prisma/engines/-/engines-5.17.0.tgz",
+      "integrity": "sha512-+r+Nf+JP210Jur+/X8SIPLtz+uW9YA4QO5IXA+KcSOBe/shT47bCcRMTYCbOESw3FFYFTwe7vU6KTWHKPiwvtg==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "@prisma/debug": "5.17.0",
+        "@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
+        "@prisma/fetch-engine": "5.17.0",
+        "@prisma/get-platform": "5.17.0"
+      }
+    },
+    "node_modules/@prisma/engines-version": {
+      "version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
+      "resolved": "https://registry.npmjs.org/@prisma/engines-version/-/engines-version-5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053.tgz",
+      "integrity": "sha512-tUuxZZysZDcrk5oaNOdrBnnkoTtmNQPkzINFDjz7eG6vcs9AVDmA/F6K5Plsb2aQc/l5M2EnFqn3htng9FA4hg=="
+    },
+    "node_modules/@prisma/fetch-engine": {
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/@prisma/fetch-engine/-/fetch-engine-5.17.0.tgz",
+      "integrity": "sha512-ESxiOaHuC488ilLPnrv/tM2KrPhQB5TRris/IeIV4ZvUuKeaicCl4Xj/JCQeG9IlxqOgf1cCg5h5vAzlewN91Q==",
+      "dependencies": {
+        "@prisma/debug": "5.17.0",
+        "@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
+        "@prisma/get-platform": "5.17.0"
+      }
+    },
+    "node_modules/@prisma/get-platform": {
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/@prisma/get-platform/-/get-platform-5.17.0.tgz",
+      "integrity": "sha512-UlDgbRozCP1rfJ5Tlkf3Cnftb6srGrEQ4Nm3og+1Se2gWmCZ0hmPIi+tQikGDUVLlvOWx3Gyi9LzgRP+HTXV9w==",
+      "dependencies": {
+        "@prisma/debug": "5.17.0"
+      }
+    },
    "node_modules/@types/prop-types": {
      "version": "15.7.12",
      "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.12.tgz",
@ -74,6 +115,21 @@
        "node": ">=0.10.0"
      }
    },
+    "node_modules/prisma": {
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/prisma/-/prisma-5.17.0.tgz",
+      "integrity": "sha512-m4UWkN5lBE6yevqeOxEvmepnL5cNPEjzMw2IqDB59AcEV6w7D8vGljDLd1gPFH+W6gUxw9x7/RmN5dCS/WTPxA==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "@prisma/engines": "5.17.0"
+      },
+      "bin": {
+        "prisma": "build/index.js"
+      },
+      "engines": {
+        "node": ">=16.13"
+      }
+    },
    "node_modules/prop-types": {
      "version": "15.8.1",
      "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
--- a/package.json
+++ b/package.json
@ -1,5 +1,6 @@
 {
  "dependencies": {
+    "prisma": "^5.17.0",
    "react-copy-to-clipboard": "^5.1.0"
  },
  "devDependencies": {
--- a/tests/llm_translation/test_max_completion_tokens.py
+++ b/tests/llm_translation/test_max_completion_tokens.py
@ -141,12 +141,12 @@ def test_all_model_configs():
        "max_completion_tokens" in VertexAILlama3Config().get_supported_openai_params()
    )
    assert VertexAILlama3Config().map_openai_params(
-        {"max_completion_tokens": 10}, {}, "llama3"
+        {"max_completion_tokens": 10}, {}, "llama3", drop_params=False
    ) == {"max_tokens": 10}

    assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params()
    assert VertexAIAi21Config().map_openai_params(
-        {"max_completion_tokens": 10}, {}, "llama3"
+        {"max_completion_tokens": 10}, {}, "llama3", drop_params=False
    ) == {"max_tokens": 10}

    from litellm.llms.fireworks_ai.chat.fireworks_ai_transformation import (
@ -332,6 +332,7 @@ def test_all_model_configs():
        model="gemini-1.0-pro",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
+        drop_params=False,
    ) == {"max_output_tokens": 10}

    assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params()
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@ -600,3 +600,35 @@ def test_o1_model_params():
    )
    assert optional_params["seed"] == 10
    assert optional_params["user"] == "John"
+
+
+@pytest.mark.parametrize(
+    "temperature, expected_error",
+    [(0.2, True), (1, False)],
+)
+def test_o1_model_temperature_params(temperature, expected_error):
+    if expected_error:
+        with pytest.raises(litellm.UnsupportedParamsError):
+            get_optional_params(
+                model="o1-preview-2024-09-12",
+                custom_llm_provider="openai",
+                temperature=temperature,
+            )
+    else:
+        get_optional_params(
+            model="o1-preview-2024-09-12",
+            custom_llm_provider="openai",
+            temperature=temperature,
+        )
+
+
+def test_unmapped_gemini_model_params():
+    """
+    Test if unmapped gemini model optional params are translated correctly
+    """
+    optional_params = get_optional_params(
+        model="gemini-new-model",
+        custom_llm_provider="vertex_ai",
+        stop="stop_word",
+    )
+    assert optional_params["stop_sequences"] == ["stop_word"]