LiteLLM Minor Fixes & Improvements (09/25/2024) (#5893)

* fix(langfuse.py): support new langfuse prompt_chat class init params * fix(langfuse.py): handle new init values on prompt chat + prompt text templates fixes error caused during langfuse logging * docs(openai_compatible.md): clarify `openai/` handles correct routing for `/v1/completions` route Fixes https://github.com/BerriAI/litellm/issues/5876 * fix(utils.py): handle unmapped gemini model optional param translation Fixes https://github.com/BerriAI/litellm/issues/5888 * fix(o1_transformation.py): fix o-1 validation, to not raise error if temperature=1 Fixes https://github.com/BerriAI/litellm/issues/5884 * fix(prisma_client.py): refresh iam token Fixes https://github.com/BerriAI/litellm/issues/5896 * fix: pass drop params where required * fix(utils.py): pass drop_params correctly * fix(types/vertex_ai.py): fix generation config * test(test_max_completion_tokens.py): fix test * fix(vertex_and_google_ai_studio_gemini.py): fix map openai params
2024-09-26 16:41:44 -07:00 · 2024-09-26 16:41:44 -07:00 · a1d9e96b31
commit a1d9e96b31
parent 16c0307eab
22 changed files with 755 additions and 292 deletions
--- a/docs/my-website/docs/providers/openai_compatible.md
+++ b/docs/my-website/docs/providers/openai_compatible.md
@ -7,7 +7,7 @@ To call models hosted behind an openai proxy, make 2 changes:
 1. For `/chat/completions`: Put `openai/` in front of your model name, so litellm knows you're trying to call an openai `/chat/completions` endpoint. 
-2. For `/completions`: Put `text-completion-openai/` in front of your model name, so litellm knows you're trying to call an openai `/completions` endpoint. 
+2. For `/completions`: Put `text-completion-openai/` in front of your model name, so litellm knows you're trying to call an openai `/completions` endpoint. [NOT REQUIRED for `openai/` endpoints called via `/v1/completions` route].
 2. **Do NOT** add anything additional to the base url e.g. `/v1/embedding`. LiteLLM uses the openai-client to make these calls, and that automatically adds the relevant endpoints. 
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@ -1,6 +1,7 @@
 #### What this does ####
 #    On success, logs events to Langfuse
 import copy
 import inspect
 import os
 import traceback
@ -676,21 +677,37 @@ def _add_prompt_to_generation_params(
        elif "version" in user_prompt and "prompt" in user_prompt:
            # prompts
            if isinstance(user_prompt["prompt"], str):
-                _prompt_obj = Prompt_Text(
+                prompt_text_params = getattr(
-                    name=user_prompt["name"],
+                    Prompt_Text, "model_fields", Prompt_Text.__fields__
                    prompt=user_prompt["prompt"],
                    version=user_prompt["version"],
                    config=user_prompt.get("config", None),
                )
                _data = {
                    "name": user_prompt["name"],
                    "prompt": user_prompt["prompt"],
                    "version": user_prompt["version"],
                    "config": user_prompt.get("config", None),
                }
                if "labels" in prompt_text_params and "tags" in prompt_text_params:
                    _data["labels"] = user_prompt.get("labels", []) or []
                    _data["tags"] = user_prompt.get("tags", []) or []
                _prompt_obj = Prompt_Text(**_data)  # type: ignore
                generation_params["prompt"] = TextPromptClient(prompt=_prompt_obj)
            elif isinstance(user_prompt["prompt"], list):
-                _prompt_obj = Prompt_Chat(
+                prompt_chat_params = getattr(
-                    name=user_prompt["name"],
+                    Prompt_Chat, "model_fields", Prompt_Chat.__fields__
                    prompt=user_prompt["prompt"],
                    version=user_prompt["version"],
                    config=user_prompt.get("config", None),
                )
                _data = {
                    "name": user_prompt["name"],
                    "prompt": user_prompt["prompt"],
                    "version": user_prompt["version"],
                    "config": user_prompt.get("config", None),
                }
                if "labels" in prompt_chat_params and "tags" in prompt_chat_params:
                    _data["labels"] = user_prompt.get("labels", []) or []
                    _data["tags"] = user_prompt.get("tags", []) or []
                _prompt_obj = Prompt_Chat(**_data)  # type: ignore
                generation_params["prompt"] = ChatPromptClient(prompt=_prompt_obj)
            else:
                verbose_logger.error(
--- a/litellm/llms/OpenAI/chat/gpt_transformation.py
+++ b/litellm/llms/OpenAI/chat/gpt_transformation.py
@ -125,7 +125,11 @@ class OpenAIGPTConfig:
        return base_params + model_specific_params
    def _map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        supported_openai_params = self.get_supported_openai_params(model)
        for param, value in non_default_params.items():
@ -134,10 +138,15 @@ class OpenAIGPTConfig:
        return optional_params
    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        return self._map_openai_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
            drop_params=drop_params,
        )
--- a/litellm/llms/OpenAI/chat/o1_transformation.py
+++ b/litellm/llms/OpenAI/chat/o1_transformation.py
@ -57,7 +57,6 @@ class OpenAIO1Config(OpenAIGPTConfig):
            "parallel_tool_calls",
            "function_call",
            "functions",
            "temperature",
            "top_p",
            "n",
            "presence_penalty",
@ -73,13 +72,36 @@ class OpenAIO1Config(OpenAIGPTConfig):
        ]
    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ):
        if "max_tokens" in non_default_params:
            optional_params["max_completion_tokens"] = non_default_params.pop(
                "max_tokens"
            )
-        return super()._map_openai_params(non_default_params, optional_params, model)
+        if "temperature" in non_default_params:
            temperature_value: Optional[float] = non_default_params.pop("temperature")
            if temperature_value is not None:
                if temperature_value == 0 or temperature_value == 1:
                    optional_params["temperature"] = temperature_value
                else:
                    ## UNSUPPORTED TOOL CHOICE VALUE
                    if litellm.drop_params is True or drop_params is True:
                        pass
                    else:
                        raise litellm.utils.UnsupportedParamsError(
                            message="O-1 doesn't support temperature={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
                                temperature_value
                            ),
                            status_code=400,
                        )
        return super()._map_openai_params(
            non_default_params, optional_params, model, drop_params
        )
    def is_model_o1_reasoning_model(self, model: str) -> bool:
        if model in litellm.open_ai_chat_completion_models and "o1" in model:
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@ -413,7 +413,11 @@ class OpenAIConfig:
        return optional_params
    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        """ """
        if litellm.OpenAIO1Config().is_model_o1_reasoning_model(model=model):
@ -421,11 +425,13 @@ class OpenAIConfig:
                non_default_params=non_default_params,
                optional_params=optional_params,
                model=model,
                drop_params=drop_params,
            )
        return litellm.OpenAIGPTConfig().map_openai_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
            drop_params=drop_params,
        )
--- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py
@ -22,7 +22,7 @@ from litellm.types.llms.vertex_ai import (
    Tools,
 )
-from ..common_utils import get_supports_system_message, get_supports_response_schema
+from ..common_utils import get_supports_response_schema, get_supports_system_message
 from ..vertex_ai_non_gemini import _gemini_convert_messages_with_history
@ -73,8 +73,14 @@ def _transform_request_body(
        safety_settings: Optional[List[SafetSettingsConfig]] = optional_params.pop(
            "safety_settings", None
        )  # type: ignore
        config_fields = GenerationConfig.__annotations__.keys()
        filtered_params = {
            k: v for k, v in optional_params.items() if k in config_fields
        }
        generation_config: Optional[GenerationConfig] = GenerationConfig(
-            **optional_params
+            **filtered_params
        )
        data = RequestBody(contents=content)
        if system_instructions is not None:
@ -104,7 +110,7 @@ def sync_transform_request_body(
    timeout: Optional[Union[float, httpx.Timeout]],
    extra_headers: Optional[dict],
    optional_params: dict,
-    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
+    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,  # type: ignore
    custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
    litellm_params: dict,
 ) -> RequestBody:
@ -146,7 +152,7 @@ async def async_transform_request_body(
    timeout: Optional[Union[float, httpx.Timeout]],
    extra_headers: Optional[dict],
    optional_params: dict,
-    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
+    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,  # type: ignore
    custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
    litellm_params: dict,
 ) -> RequestBody:
@ -199,6 +205,7 @@ def _transform_system_message(
    if supports_system_message is True:
        for idx, message in enumerate(messages):
            if message["role"] == "system":
                _system_content_block: Optional[PartType] = None
                if isinstance(message["content"], str):
                    _system_content_block = PartType(text=message["content"])
                elif isinstance(message["content"], list):
@ -206,6 +213,7 @@ def _transform_system_message(
                    for content in message["content"]:
                        system_text += content.get("text") or ""
                    _system_content_block = PartType(text=system_text)
                if _system_content_block is not None:
                    system_content_blocks.append(_system_content_block)
                    system_prompt_indices.append(idx)
        if len(system_prompt_indices) > 0:
--- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
@ -252,233 +252,6 @@ class VertexAIConfig:
        ]
 class GoogleAIStudioGeminiConfig:  # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported
    """
    Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig
    The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters:
    - `temperature` (float): This controls the degree of randomness in token selection.
    - `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
    - `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
    - `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
    - `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`.
    - `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response.
    - `candidate_count` (int): Number of generated responses to return.
    - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
    Note: Please make sure to modify the default parameters as required for your use case.
    """
    temperature: Optional[float] = None
    max_output_tokens: Optional[int] = None
    top_p: Optional[float] = None
    top_k: Optional[int] = None
    response_mime_type: Optional[str] = None
    response_schema: Optional[dict] = None
    candidate_count: Optional[int] = None
    stop_sequences: Optional[list] = None
    def __init__(
        self,
        temperature: Optional[float] = None,
        max_output_tokens: Optional[int] = None,
        top_p: Optional[float] = None,
        top_k: Optional[int] = None,
        response_mime_type: Optional[str] = None,
        response_schema: Optional[dict] = None,
        candidate_count: Optional[int] = None,
        stop_sequences: Optional[list] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self):
        return [
            "temperature",
            "top_p",
            "max_tokens",
            "max_completion_tokens",
            "stream",
            "tools",
            "tool_choice",
            "functions",
            "response_format",
            "n",
            "stop",
        ]
    def _map_function(self, value: List[dict]) -> List[Tools]:
        gtool_func_declarations = []
        googleSearchRetrieval: Optional[dict] = None
        for tool in value:
            openai_function_object: Optional[ChatCompletionToolParamFunctionChunk] = (
                None
            )
            if "function" in tool:  # tools list
                openai_function_object = ChatCompletionToolParamFunctionChunk(  # type: ignore
                    **tool["function"]
                )
            elif "name" in tool:  # functions list
                openai_function_object = ChatCompletionToolParamFunctionChunk(**tool)  # type: ignore
            # check if grounding
            if tool.get("googleSearchRetrieval", None) is not None:
                googleSearchRetrieval = tool["googleSearchRetrieval"]
            elif openai_function_object is not None:
                gtool_func_declaration = FunctionDeclaration(
                    name=openai_function_object["name"],
                    description=openai_function_object.get("description", ""),
                    parameters=openai_function_object.get("parameters", {}),
                )
                gtool_func_declarations.append(gtool_func_declaration)
            else:
                # assume it's a provider-specific param
                verbose_logger.warning(
                    "Invalid tool={}. Use `litellm.set_verbose` or `litellm --detailed_debug` to see raw request."
                )
        _tools = Tools(
            function_declarations=gtool_func_declarations,
        )
        if googleSearchRetrieval is not None:
            _tools["googleSearchRetrieval"] = googleSearchRetrieval
        return [_tools]
    def map_tool_choice_values(
        self, model: str, tool_choice: Union[str, dict]
    ) -> Optional[ToolConfig]:
        if tool_choice == "none":
            return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="NONE"))
        elif tool_choice == "required":
            return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="ANY"))
        elif tool_choice == "auto":
            return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="AUTO"))
        elif isinstance(tool_choice, dict):
            # only supported for anthropic + mistral models - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
            name = tool_choice.get("function", {}).get("name", "")
            return ToolConfig(
                functionCallingConfig=FunctionCallingConfig(
                    mode="ANY", allowed_function_names=[name]
                )
            )
        else:
            raise litellm.utils.UnsupportedParamsError(
                message="VertexAI doesn't support tool_choice={}. Supported tool_choice values=['auto', 'required', json object]. To drop it from the call, set `litellm.drop_params = True.".format(
                    tool_choice
                ),
                status_code=400,
            )
    def map_openai_params(
        self,
        model: str,
        non_default_params: dict,
        optional_params: dict,
    ):
        for param, value in non_default_params.items():
            if param == "temperature":
                optional_params["temperature"] = value
            if param == "top_p":
                optional_params["top_p"] = value
            if (
                param == "stream" and value is True
            ):  # sending stream = False, can cause it to get passed unchecked and raise issues
                optional_params["stream"] = value
            if param == "n":
                optional_params["candidate_count"] = value
            if param == "stop":
                if isinstance(value, str):
                    optional_params["stop_sequences"] = [value]
                elif isinstance(value, list):
                    optional_params["stop_sequences"] = value
            if param == "max_tokens" or param == "max_completion_tokens":
                optional_params["max_output_tokens"] = value
            if param == "response_format":  # type: ignore
                if value["type"] == "json_object":  # type: ignore
                    if value["type"] == "json_object":  # type: ignore
                        optional_params["response_mime_type"] = "application/json"
                    elif value["type"] == "text":  # type: ignore
                        optional_params["response_mime_type"] = "text/plain"
                    if "response_schema" in value:  # type: ignore
                        optional_params["response_mime_type"] = "application/json"
                        optional_params["response_schema"] = value["response_schema"]  # type: ignore
                elif value["type"] == "json_schema":  # type: ignore
                    if "json_schema" in value and "schema" in value["json_schema"]:  # type: ignore
                        optional_params["response_mime_type"] = "application/json"
                        optional_params["response_schema"] = value["json_schema"]["schema"]  # type: ignore
            if (param == "tools" or param == "functions") and isinstance(value, list):
                optional_params["tools"] = self._map_function(value=value)
                optional_params["litellm_param_is_function_call"] = (
                    True if param == "functions" else False
                )
            if param == "tool_choice" and (
                isinstance(value, str) or isinstance(value, dict)
            ):
                _tool_choice_value = self.map_tool_choice_values(
                    model=model, tool_choice=value  # type: ignore
                )
                if _tool_choice_value is not None:
                    optional_params["tool_choice"] = _tool_choice_value
        return optional_params
    def get_mapped_special_auth_params(self) -> dict:
        """
        Common auth params across bedrock/vertex_ai/azure/watsonx
        """
        return {"project": "vertex_project", "region_name": "vertex_location"}
    def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
        mapped_params = self.get_mapped_special_auth_params()
        for param, value in non_default_params.items():
            if param in mapped_params:
                optional_params[mapped_params[param]] = value
        return optional_params
    def get_flagged_finish_reasons(self) -> Dict[str, str]:
        """
        Return Dictionary of finish reasons which indicate response was flagged
        and what it means
        """
        return {
            "SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.",
            "RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.",
            "BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.",
            "PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.",
            "SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
        }
 class VertexGeminiConfig:
    """
    Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts
@ -752,6 +525,108 @@ class VertexGeminiConfig:
        return exception_string
 class GoogleAIStudioGeminiConfig(
    VertexGeminiConfig
 ):  # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported
    """
    Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig
    The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters:
    - `temperature` (float): This controls the degree of randomness in token selection.
    - `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
    - `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
    - `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
    - `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`.
    - `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response.
    - `candidate_count` (int): Number of generated responses to return.
    - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
    Note: Please make sure to modify the default parameters as required for your use case.
    """
    temperature: Optional[float] = None
    max_output_tokens: Optional[int] = None
    top_p: Optional[float] = None
    top_k: Optional[int] = None
    response_mime_type: Optional[str] = None
    response_schema: Optional[dict] = None
    candidate_count: Optional[int] = None
    stop_sequences: Optional[list] = None
    def __init__(
        self,
        temperature: Optional[float] = None,
        max_output_tokens: Optional[int] = None,
        top_p: Optional[float] = None,
        top_k: Optional[int] = None,
        response_mime_type: Optional[str] = None,
        response_schema: Optional[dict] = None,
        candidate_count: Optional[int] = None,
        stop_sequences: Optional[list] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self):
        return [
            "temperature",
            "top_p",
            "max_tokens",
            "max_completion_tokens",
            "stream",
            "tools",
            "tool_choice",
            "functions",
            "response_format",
            "n",
            "stop",
        ]
    def map_openai_params(
        self,
        model: str,
        non_default_params: Dict,
        optional_params: Dict,
        drop_params: bool,
    ):
        # drop frequency_penalty and presence_penalty
        if "frequency_penalty" in non_default_params:
            del non_default_params["frequency_penalty"]
        if "presence_penalty" in non_default_params:
            del non_default_params["presence_penalty"]
        return super().map_openai_params(
            model, non_default_params, optional_params, drop_params
        )
 async def make_call(
    client: Optional[AsyncHTTPHandler],
    api_base: str,
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py
@ -44,7 +44,11 @@ class VertexAIAi21Config:
        return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ):
        if "max_completion_tokens" in non_default_params:
            non_default_params["max_tokens"] = non_default_params.pop(
@ -54,4 +58,5 @@ class VertexAIAi21Config:
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
            drop_params=drop_params,
        )
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py
@ -50,7 +50,11 @@ class VertexAILlama3Config:
        return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
+        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ):
        if "max_completion_tokens" in non_default_params:
            non_default_params["max_tokens"] = non_default_params.pop(
@ -60,4 +64,5 @@ class VertexAILlama3Config:
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
            drop_params=drop_params,
        )
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -31,15 +31,21 @@ model_list:
  - model_name: "anthropic/*"
    litellm_params:
      model: "anthropic/*"
-  - model_name: "openai/*"
+  - model_name: "*"
    litellm_params:
      model: "openai/*"
  - model_name: "fireworks_ai/*"
    litellm_params:
      model: "fireworks_ai/*"
      configurable_clientside_auth_params: ["api_base"]
-
+  - model_name: "gemini-flash-experimental"
    litellm_params:
      model: "vertex_ai/gemini-flash-experimental"
 litellm_settings:
-  success_callback: ["langfuse"]
+  success_callback: ["langfuse", "prometheus"]
-  cache: true
+  failure_callback: ["prometheus"]
 general_settings: 
  proxy_budget_rescheduler_min_time: 1
  proxy_budget_rescheduler_max_time: 1
--- a/litellm/proxy/auth/rds_iam_token.py
+++ b/litellm/proxy/auth/rds_iam_token.py
@ -1,5 +1,5 @@
 import os
-from typing import Optional, Union
+from typing import Any, Optional, Union
 import httpx
@ -34,7 +34,7 @@ def init_rds_client(
    # Iterate over parameters and update if needed
    for i, param in enumerate(params_to_check):
        if param and param.startswith("os.environ/"):
-            params_to_check[i] = get_secret(param)
+            params_to_check[i] = get_secret(param)  # type: ignore
    # Assign updated values back to parameters
    (
        aws_access_key_id,
@ -62,13 +62,13 @@ def init_rds_client(
    import boto3
    if isinstance(timeout, float):
-        config = boto3.session.Config(connect_timeout=timeout, read_timeout=timeout)
+        config = boto3.session.Config(connect_timeout=timeout, read_timeout=timeout)  # type: ignore
    elif isinstance(timeout, httpx.Timeout):
-        config = boto3.session.Config(
+        config = boto3.session.Config(  # type: ignore
            connect_timeout=timeout.connect, read_timeout=timeout.read
        )
    else:
-        config = boto3.session.Config()
+        config = boto3.session.Config()  # type: ignore
    ### CHECK STS ###
    if (
@ -105,6 +105,7 @@ def init_rds_client(
            region_name=region_name,
            config=config,
        )
    elif aws_role_name is not None and aws_session_name is not None:
        # use sts if role name passed in
        sts_client = boto3.client(
@ -144,6 +145,7 @@ def init_rds_client(
            region_name=region_name,
            config=config,
        )
    else:
        # aws_access_key_id is None, assume user is trying to auth using env variables
        # boto3 automatically reads env variables
@ -157,11 +159,14 @@ def init_rds_client(
    return client
-def generate_iam_auth_token(db_host, db_port, db_user) -> str:
+def generate_iam_auth_token(
    db_host, db_port, db_user, client: Optional[Any] = None
 ) -> str:
    from urllib.parse import quote
    import boto3
    if client is None:
        boto_client = init_rds_client(
            aws_region_name=os.getenv("AWS_REGION_NAME"),
            aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
@ -173,9 +178,12 @@ def generate_iam_auth_token(db_host, db_port, db_user) -> str:
                "AWS_WEB_IDENTITY_TOKEN", os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
            ),
        )
    else:
        boto_client = client
    token = boto_client.generate_db_auth_token(
        DBHostname=db_host, Port=db_port, DBUsername=db_user
    )
    cleaned_token = quote(token, safe="")
    return cleaned_token
--- a/litellm/proxy/db/prisma_client.py
+++ b/litellm/proxy/db/prisma_client.py
@ -0,0 +1,106 @@
 import asyncio
 import os
 import urllib
 import urllib.parse
 from datetime import datetime, timedelta
 from typing import Any, Callable, Optional
 class PrismaWrapper:
    def __init__(self, original_prisma: Any, iam_token_db_auth: bool):
        self._original_prisma = original_prisma
        self.iam_token_db_auth = iam_token_db_auth
    def is_token_expired(self, token_url: Optional[str]) -> bool:
        if token_url is None:
            return True
        # Decode the token URL to handle URL-encoded characters
        decoded_url = urllib.parse.unquote(token_url)
        # Parse the token URL
        parsed_url = urllib.parse.urlparse(decoded_url)
        # Parse the query parameters from the path component (if they exist there)
        query_params = urllib.parse.parse_qs(parsed_url.query)
        # Get expiration time from the query parameters
        expires = query_params.get("X-Amz-Expires", [None])[0]
        if expires is None:
            raise ValueError("X-Amz-Expires parameter is missing or invalid.")
        expires_int = int(expires)
        # Get the token's creation time from the X-Amz-Date parameter
        token_time_str = query_params.get("X-Amz-Date", [""])[0]
        if not token_time_str:
            raise ValueError("X-Amz-Date parameter is missing or invalid.")
        # Ensure the token time string is parsed correctly
        try:
            token_time = datetime.strptime(token_time_str, "%Y%m%dT%H%M%SZ")
        except ValueError as e:
            raise ValueError(f"Invalid X-Amz-Date format: {e}")
        # Calculate the expiration time
        expiration_time = token_time + timedelta(seconds=expires_int)
        # Current time in UTC
        current_time = datetime.utcnow()
        # Check if the token is expired
        return current_time > expiration_time
    def get_rds_iam_token(self) -> Optional[str]:
        if self.iam_token_db_auth:
            from litellm.proxy.auth.rds_iam_token import generate_iam_auth_token
            db_host = os.getenv("DATABASE_HOST")
            db_port = os.getenv("DATABASE_PORT")
            db_user = os.getenv("DATABASE_USER")
            db_name = os.getenv("DATABASE_NAME")
            db_schema = os.getenv("DATABASE_SCHEMA")
            token = generate_iam_auth_token(
                db_host=db_host, db_port=db_port, db_user=db_user
            )
            # print(f"token: {token}")
            _db_url = f"postgresql://{db_user}:{token}@{db_host}:{db_port}/{db_name}"
            if db_schema:
                _db_url += f"?schema={db_schema}"
            os.environ["DATABASE_URL"] = _db_url
            return _db_url
        return None
    async def recreate_prisma_client(
        self, new_db_url: str, http_client: Optional[Any] = None
    ):
        from prisma import Prisma  # type: ignore
        if http_client is not None:
            self._original_prisma = Prisma(http=http_client)
        else:
            self._original_prisma = Prisma()
        await self._original_prisma.connect()
    def __getattr__(self, name: str):
        original_attr = getattr(self._original_prisma, name)
        if self.iam_token_db_auth:
            db_url = os.getenv("DATABASE_URL")
            if self.is_token_expired(db_url):
                db_url = self.get_rds_iam_token()
                loop = asyncio.get_event_loop()
                if db_url:
                    if loop.is_running():
                        asyncio.run_coroutine_threadsafe(
                            self.recreate_prisma_client(db_url), loop
                        )
                    else:
                        asyncio.run(self.recreate_prisma_client(db_url))
                else:
                    raise ValueError("Failed to get RDS IAM token")
        return original_attr
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -40,7 +40,7 @@ def append_query_params(url, params) -> str:
    parsed_query.update(params)
    encoded_query = urlparse.urlencode(parsed_query, doseq=True)
    modified_url = urlparse.urlunparse(parsed_url._replace(query=encoded_query))
-    return modified_url
+    return modified_url  # type: ignore
 def run_ollama_serve():
@ -287,7 +287,7 @@ def run_server(
                    save_worker_config,
                )
    if version == True:
-        pkg_version = importlib.metadata.version("litellm")
+        pkg_version = importlib.metadata.version("litellm")  # type: ignore
        click.echo(f"\nLiteLLM: Current Version = {pkg_version}\n")
        return
    if model and "ollama" in model and api_base is None:
@ -338,14 +338,14 @@ def run_server(
        futures = []
        start_time = time.time()
        # Make concurrent calls
-        with concurrent.futures.ThreadPoolExecutor(
+        with concurrent.futures.ThreadPoolExecutor(  # type: ignore
            max_workers=concurrent_calls
        ) as executor:
            for _ in range(concurrent_calls):
                futures.append(executor.submit(_make_openai_completion))
        # Wait for all futures to complete
-        concurrent.futures.wait(futures)
+        concurrent.futures.wait(futures)  # type: ignore
        # Summarize the results
        successful_calls = 0
@ -476,6 +476,7 @@ def run_server(
                _db_url += f"?schema={db_schema}"
            os.environ["DATABASE_URL"] = _db_url
            os.environ["IAM_TOKEN_DB_AUTH"] = "True"
        ### DECRYPT ENV VAR ###
@ -600,8 +601,9 @@ def run_server(
                    0, os.path.abspath("../..")
                )  # Adds the parent directory to the system path - for litellm local dev
                import litellm
                from litellm import get_secret_str
-                database_url = litellm.get_secret(database_url, default_value=None)
+                database_url = get_secret_str(database_url, default_value=None)
                os.chdir(original_dir)
            if database_url is not None and isinstance(database_url, str):
                os.environ["DATABASE_URL"] = database_url
@ -650,6 +652,8 @@ def run_server(
                        subprocess.run(["prisma", "db", "push", "--accept-data-loss"])
                        break  # Exit the loop if the subprocess succeeds
                    except subprocess.CalledProcessError as e:
                        import time
                        print(f"Error: {e}")  # noqa
                        time.sleep(random.randrange(start=1, stop=5))
                    finally:
@ -728,12 +732,16 @@ def run_server(
                def load_config(self):
                    # note: This Loads the gunicorn config - has nothing to do with LiteLLM Proxy config
                    if self.cfg is not None:
                        config = {
                            key: value
                            for key, value in self.options.items()
                            if key in self.cfg.settings and value is not None
                        }
                    else:
                        config = {}
                    for key, value in config.items():
                        if self.cfg is not None:
                            self.cfg.set(key.lower(), value)
                def load(self):
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -65,11 +65,13 @@ from litellm.proxy.db.create_views import (
    create_missing_views,
    should_create_missing_views,
 )
 from litellm.proxy.db.prisma_client import PrismaWrapper
 from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
 from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
 from litellm.proxy.hooks.parallel_request_limiter import (
    _PROXY_MaxParallelRequestsHandler,
 )
 from litellm.secret_managers.main import str_to_bool
 from litellm.types.utils import CallTypes, LoggedLiteLLMParams
 if TYPE_CHECKING:
@ -1017,6 +1019,9 @@ class PrismaClient:
        )
        ## init logging object
        self.proxy_logging_obj = proxy_logging_obj
        self.iam_token_db_auth: Optional[bool] = str_to_bool(
            os.getenv("IAM_TOKEN_DB_AUTH")
        )
        try:
            from prisma import Prisma  # type: ignore
        except Exception as e:
@ -1043,9 +1048,23 @@ class PrismaClient:
            from prisma import Prisma  # type: ignore
        verbose_proxy_logger.debug("Connecting Prisma Client to DB..")
        if http_client is not None:
-            self.db = Prisma(http=http_client)
+            self.db = PrismaWrapper(
                original_prisma=Prisma(http=http_client),
                iam_token_db_auth=(
                    self.iam_token_db_auth
                    if self.iam_token_db_auth is not None
                    else False
                ),
            )
        else:
-            self.db = Prisma()  # Client to connect to Prisma db
+            self.db = PrismaWrapper(
                original_prisma=Prisma(),
                iam_token_db_auth=(
                    self.iam_token_db_auth
                    if self.iam_token_db_auth is not None
                    else False
                ),
            )  # Client to connect to Prisma db
        verbose_proxy_logger.debug("Success - Connected Prisma Client to DB")
    def hash_token(self, token: str):
@ -1141,9 +1160,9 @@ class PrismaClient:
                        "LiteLLM_VerificationTokenView Created in DB!"
                    )
                else:
-                    should_create_views = await should_create_missing_views(db=self.db)
+                    should_create_views = await should_create_missing_views(db=self.db.db)  # type: ignore
                    if should_create_views:
-                        await create_missing_views(db=self.db)
+                        await create_missing_views(db=self.db)  # type: ignore
                    else:
                        # don't block execution if these views are missing
                        # Convert lists to sets for efficient difference calculation
--- a/litellm/secret_managers/main.py
+++ b/litellm/secret_managers/main.py
@ -29,7 +29,7 @@ def _is_base64(s):
        return False
-def str_to_bool(value: str) -> Optional[bool]:
+def str_to_bool(value: Optional[str]) -> Optional[bool]:
    """
    Converts a string to a boolean if it's a recognized boolean string.
    Returns None if the string is not a recognized boolean value.
@ -37,6 +37,9 @@ def str_to_bool(value: str) -> Optional[bool]:
    :param value: The string to be checked.
    :return: True or False if the string is a recognized boolean, otherwise None.
    """
    if value is None:
        return None
    true_values = {"true"}
    false_values = {"false"}
--- a/litellm/tests/test_alangfuse.py
+++ b/litellm/tests/test_alangfuse.py
@ -968,3 +968,259 @@ def test_aaalangfuse_dynamic_logging():
    )
    langfuse_client.get_trace(id=trace_id)
 import datetime
 generation_params = {
    "name": "litellm-acompletion",
    "id": "time-10-35-32-316778_chatcmpl-ABQDEzVJS8fziPdvkeTA3tnQaxeMX",
    "start_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 316778),
    "end_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 897141),
    "model": "gpt-4o",
    "model_parameters": {
        "stream": False,
        "max_retries": 0,
        "extra_body": "{}",
        "system_fingerprint": "fp_52a7f40b0b",
    },
    "input": {
        "messages": [
            {"content": "<>", "role": "system"},
            {"content": "<>", "role": "user"},
        ]
    },
    "output": {
        "content": "Hello! It looks like your message might have been sent by accident. How can I assist you today?",
        "role": "assistant",
        "tool_calls": None,
        "function_call": None,
    },
    "usage": {"prompt_tokens": 13, "completion_tokens": 21, "total_cost": 0.00038},
    "metadata": {
        "prompt": {
            "name": "conversational-service-answer_question_restricted_reply",
            "version": 9,
            "config": {},
            "labels": ["latest", "staging", "production"],
            "tags": ["conversational-service"],
            "prompt": [
                {"role": "system", "content": "<>"},
                {"role": "user", "content": "{{text}}"},
            ],
        },
        "requester_metadata": {
            "session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1",
            "trace_name": "jess",
            "tags": ["conversational-service", "generative-ai-engine", "staging"],
            "prompt": {
                "name": "conversational-service-answer_question_restricted_reply",
                "version": 9,
                "config": {},
                "labels": ["latest", "staging", "production"],
                "tags": ["conversational-service"],
                "prompt": [
                    {"role": "system", "content": "<>"},
                    {"role": "user", "content": "{{text}}"},
                ],
            },
        },
        "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
        "litellm_api_version": "0.0.0",
        "user_api_key_user_id": "default_user_id",
        "user_api_key_spend": 0.0,
        "user_api_key_metadata": {},
        "requester_ip_address": "127.0.0.1",
        "model_group": "gpt-4o",
        "model_group_size": 0,
        "deployment": "gpt-4o",
        "model_info": {
            "id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
            "db_model": False,
        },
        "hidden_params": {
            "headers": {
                "date": "Wed, 25 Sep 2024 17:35:32 GMT",
                "content-type": "application/json",
                "transfer-encoding": "chunked",
                "connection": "keep-alive",
                "access-control-expose-headers": "X-Request-ID",
                "openai-organization": "reliablekeystest",
                "openai-processing-ms": "329",
                "openai-version": "2020-10-01",
                "strict-transport-security": "max-age=31536000; includeSubDomains; preload",
                "x-ratelimit-limit-requests": "10000",
                "x-ratelimit-limit-tokens": "30000000",
                "x-ratelimit-remaining-requests": "9999",
                "x-ratelimit-remaining-tokens": "29999980",
                "x-ratelimit-reset-requests": "6ms",
                "x-ratelimit-reset-tokens": "0s",
                "x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
                "cf-cache-status": "DYNAMIC",
                "set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
                "x-content-type-options": "nosniff",
                "server": "cloudflare",
                "cf-ray": "8c8cc573becb232c-SJC",
                "content-encoding": "gzip",
                "alt-svc": 'h3=":443"; ma=86400',
            },
            "additional_headers": {
                "llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT",
                "llm_provider-content-type": "application/json",
                "llm_provider-transfer-encoding": "chunked",
                "llm_provider-connection": "keep-alive",
                "llm_provider-access-control-expose-headers": "X-Request-ID",
                "llm_provider-openai-organization": "reliablekeystest",
                "llm_provider-openai-processing-ms": "329",
                "llm_provider-openai-version": "2020-10-01",
                "llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload",
                "llm_provider-x-ratelimit-limit-requests": "10000",
                "llm_provider-x-ratelimit-limit-tokens": "30000000",
                "llm_provider-x-ratelimit-remaining-requests": "9999",
                "llm_provider-x-ratelimit-remaining-tokens": "29999980",
                "llm_provider-x-ratelimit-reset-requests": "6ms",
                "llm_provider-x-ratelimit-reset-tokens": "0s",
                "llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
                "llm_provider-cf-cache-status": "DYNAMIC",
                "llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
                "llm_provider-x-content-type-options": "nosniff",
                "llm_provider-server": "cloudflare",
                "llm_provider-cf-ray": "8c8cc573becb232c-SJC",
                "llm_provider-content-encoding": "gzip",
                "llm_provider-alt-svc": 'h3=":443"; ma=86400',
            },
            "litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c",
            "model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
            "api_base": "https://api.openai.com",
            "optional_params": {
                "stream": False,
                "max_retries": 0,
                "extra_body": {},
            },
            "response_cost": 0.00038,
        },
        "litellm_response_cost": 0.00038,
        "api_base": "https://api.openai.com/v1/",
        "cache_hit": False,
    },
    "level": "DEFAULT",
    "version": None,
 }
@pytest.mark.parametrize(
    "prompt",
    [
        [
            {"role": "system", "content": "<>"},
            {"role": "user", "content": "{{text}}"},
        ],
        "hello world",
    ],
 )
 def test_langfuse_prompt_type(prompt):
    from litellm.integrations.langfuse import _add_prompt_to_generation_params
    clean_metadata = {
        "prompt": {
            "name": "conversational-service-answer_question_restricted_reply",
            "version": 9,
            "config": {},
            "labels": ["latest", "staging", "production"],
            "tags": ["conversational-service"],
            "prompt": prompt,
        },
        "requester_metadata": {
            "session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1",
            "trace_name": "jess",
            "tags": ["conversational-service", "generative-ai-engine", "staging"],
            "prompt": {
                "name": "conversational-service-answer_question_restricted_reply",
                "version": 9,
                "config": {},
                "labels": ["latest", "staging", "production"],
                "tags": ["conversational-service"],
                "prompt": [
                    {"role": "system", "content": "<>"},
                    {"role": "user", "content": "{{text}}"},
                ],
            },
        },
        "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
        "litellm_api_version": "0.0.0",
        "user_api_key_user_id": "default_user_id",
        "user_api_key_spend": 0.0,
        "user_api_key_metadata": {},
        "requester_ip_address": "127.0.0.1",
        "model_group": "gpt-4o",
        "model_group_size": 0,
        "deployment": "gpt-4o",
        "model_info": {
            "id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
            "db_model": False,
        },
        "hidden_params": {
            "headers": {
                "date": "Wed, 25 Sep 2024 17:35:32 GMT",
                "content-type": "application/json",
                "transfer-encoding": "chunked",
                "connection": "keep-alive",
                "access-control-expose-headers": "X-Request-ID",
                "openai-organization": "reliablekeystest",
                "openai-processing-ms": "329",
                "openai-version": "2020-10-01",
                "strict-transport-security": "max-age=31536000; includeSubDomains; preload",
                "x-ratelimit-limit-requests": "10000",
                "x-ratelimit-limit-tokens": "30000000",
                "x-ratelimit-remaining-requests": "9999",
                "x-ratelimit-remaining-tokens": "29999980",
                "x-ratelimit-reset-requests": "6ms",
                "x-ratelimit-reset-tokens": "0s",
                "x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
                "cf-cache-status": "DYNAMIC",
                "set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
                "x-content-type-options": "nosniff",
                "server": "cloudflare",
                "cf-ray": "8c8cc573becb232c-SJC",
                "content-encoding": "gzip",
                "alt-svc": 'h3=":443"; ma=86400',
            },
            "additional_headers": {
                "llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT",
                "llm_provider-content-type": "application/json",
                "llm_provider-transfer-encoding": "chunked",
                "llm_provider-connection": "keep-alive",
                "llm_provider-access-control-expose-headers": "X-Request-ID",
                "llm_provider-openai-organization": "reliablekeystest",
                "llm_provider-openai-processing-ms": "329",
                "llm_provider-openai-version": "2020-10-01",
                "llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload",
                "llm_provider-x-ratelimit-limit-requests": "10000",
                "llm_provider-x-ratelimit-limit-tokens": "30000000",
                "llm_provider-x-ratelimit-remaining-requests": "9999",
                "llm_provider-x-ratelimit-remaining-tokens": "29999980",
                "llm_provider-x-ratelimit-reset-requests": "6ms",
                "llm_provider-x-ratelimit-reset-tokens": "0s",
                "llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
                "llm_provider-cf-cache-status": "DYNAMIC",
                "llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
                "llm_provider-x-content-type-options": "nosniff",
                "llm_provider-server": "cloudflare",
                "llm_provider-cf-ray": "8c8cc573becb232c-SJC",
                "llm_provider-content-encoding": "gzip",
                "llm_provider-alt-svc": 'h3=":443"; ma=86400',
            },
            "litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c",
            "model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
            "api_base": "https://api.openai.com",
            "optional_params": {"stream": False, "max_retries": 0, "extra_body": {}},
            "response_cost": 0.00038,
        },
        "litellm_response_cost": 0.00038,
        "api_base": "https://api.openai.com/v1/",
        "cache_hit": False,
    }
    _add_prompt_to_generation_params(
        generation_params=generation_params, clean_metadata=clean_metadata
    )
--- a/litellm/types/llms/vertex_ai.py
+++ b/litellm/types/llms/vertex_ai.py
@ -153,6 +153,7 @@ class GenerationConfig(TypedDict, total=False):
    presence_penalty: float
    frequency_penalty: float
    response_mime_type: Literal["text/plain", "application/json"]
    response_schema: dict
    seed: int
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -3239,8 +3239,15 @@ def get_optional_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
            drop_params=(
                drop_params
                if drop_params is not None and isinstance(drop_params, bool)
                else False
            ),
        )
-    elif custom_llm_provider == "vertex_ai_beta":
+    elif custom_llm_provider == "vertex_ai_beta" or (
        custom_llm_provider == "vertex_ai" and "gemini" in model
    ):
        supported_params = get_supported_openai_params(
            model=model, custom_llm_provider=custom_llm_provider
        )
@ -3277,6 +3284,11 @@ def get_optional_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
            drop_params=(
                drop_params
                if drop_params is not None and isinstance(drop_params, bool)
                else False
            ),
        )
    elif custom_llm_provider == "vertex_ai" and model in litellm.vertex_mistral_models:
        supported_params = get_supported_openai_params(
@ -3301,6 +3313,11 @@ def get_optional_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
            drop_params=(
                drop_params
                if drop_params is not None and isinstance(drop_params, bool)
                else False
            ),
        )
    elif custom_llm_provider == "sagemaker":
        ## check if unsupported param passed in
@ -3710,6 +3727,7 @@ def get_optional_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
            drop_params=drop_params,
        )
    elif custom_llm_provider == "openrouter":
        supported_params = get_supported_openai_params(
@ -3818,6 +3836,7 @@ def get_optional_params(
            non_default_params=non_default_params,
            optional_params=optional_params,
            model=model,
            drop_params=drop_params,
        )
    elif custom_llm_provider == "azure":
        supported_params = get_supported_openai_params(
--- a/package-lock.json
+++ b/package-lock.json
@ -5,12 +5,53 @@
  "packages": {
    "": {
      "dependencies": {
        "prisma": "^5.17.0",
        "react-copy-to-clipboard": "^5.1.0"
      },
      "devDependencies": {
        "@types/react-copy-to-clipboard": "^5.0.7"
      }
    },
    "node_modules/@prisma/debug": {
      "version": "5.17.0",
      "resolved": "https://registry.npmjs.org/@prisma/debug/-/debug-5.17.0.tgz",
      "integrity": "sha512-l7+AteR3P8FXiYyo496zkuoiJ5r9jLQEdUuxIxNCN1ud8rdbH3GTxm+f+dCyaSv9l9WY+29L9czaVRXz9mULfg=="
    },
    "node_modules/@prisma/engines": {
      "version": "5.17.0",
      "resolved": "https://registry.npmjs.org/@prisma/engines/-/engines-5.17.0.tgz",
      "integrity": "sha512-+r+Nf+JP210Jur+/X8SIPLtz+uW9YA4QO5IXA+KcSOBe/shT47bCcRMTYCbOESw3FFYFTwe7vU6KTWHKPiwvtg==",
      "hasInstallScript": true,
      "dependencies": {
        "@prisma/debug": "5.17.0",
        "@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
        "@prisma/fetch-engine": "5.17.0",
        "@prisma/get-platform": "5.17.0"
      }
    },
    "node_modules/@prisma/engines-version": {
      "version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
      "resolved": "https://registry.npmjs.org/@prisma/engines-version/-/engines-version-5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053.tgz",
      "integrity": "sha512-tUuxZZysZDcrk5oaNOdrBnnkoTtmNQPkzINFDjz7eG6vcs9AVDmA/F6K5Plsb2aQc/l5M2EnFqn3htng9FA4hg=="
    },
    "node_modules/@prisma/fetch-engine": {
      "version": "5.17.0",
      "resolved": "https://registry.npmjs.org/@prisma/fetch-engine/-/fetch-engine-5.17.0.tgz",
      "integrity": "sha512-ESxiOaHuC488ilLPnrv/tM2KrPhQB5TRris/IeIV4ZvUuKeaicCl4Xj/JCQeG9IlxqOgf1cCg5h5vAzlewN91Q==",
      "dependencies": {
        "@prisma/debug": "5.17.0",
        "@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
        "@prisma/get-platform": "5.17.0"
      }
    },
    "node_modules/@prisma/get-platform": {
      "version": "5.17.0",
      "resolved": "https://registry.npmjs.org/@prisma/get-platform/-/get-platform-5.17.0.tgz",
      "integrity": "sha512-UlDgbRozCP1rfJ5Tlkf3Cnftb6srGrEQ4Nm3og+1Se2gWmCZ0hmPIi+tQikGDUVLlvOWx3Gyi9LzgRP+HTXV9w==",
      "dependencies": {
        "@prisma/debug": "5.17.0"
      }
    },
    "node_modules/@types/prop-types": {
      "version": "15.7.12",
      "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.12.tgz",
@ -74,6 +115,21 @@
        "node": ">=0.10.0"
      }
    },
    "node_modules/prisma": {
      "version": "5.17.0",
      "resolved": "https://registry.npmjs.org/prisma/-/prisma-5.17.0.tgz",
      "integrity": "sha512-m4UWkN5lBE6yevqeOxEvmepnL5cNPEjzMw2IqDB59AcEV6w7D8vGljDLd1gPFH+W6gUxw9x7/RmN5dCS/WTPxA==",
      "hasInstallScript": true,
      "dependencies": {
        "@prisma/engines": "5.17.0"
      },
      "bin": {
        "prisma": "build/index.js"
      },
      "engines": {
        "node": ">=16.13"
      }
    },
    "node_modules/prop-types": {
      "version": "15.8.1",
      "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
--- a/package.json
+++ b/package.json
@ -1,5 +1,6 @@
 {
  "dependencies": {
    "prisma": "^5.17.0",
    "react-copy-to-clipboard": "^5.1.0"
  },
  "devDependencies": {
--- a/tests/llm_translation/test_max_completion_tokens.py
+++ b/tests/llm_translation/test_max_completion_tokens.py
@ -141,12 +141,12 @@ def test_all_model_configs():
        "max_completion_tokens" in VertexAILlama3Config().get_supported_openai_params()
    )
    assert VertexAILlama3Config().map_openai_params(
-        {"max_completion_tokens": 10}, {}, "llama3"
+        {"max_completion_tokens": 10}, {}, "llama3", drop_params=False
    ) == {"max_tokens": 10}
    assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params()
    assert VertexAIAi21Config().map_openai_params(
-        {"max_completion_tokens": 10}, {}, "llama3"
+        {"max_completion_tokens": 10}, {}, "llama3", drop_params=False
    ) == {"max_tokens": 10}
    from litellm.llms.fireworks_ai.chat.fireworks_ai_transformation import (
@ -332,6 +332,7 @@ def test_all_model_configs():
        model="gemini-1.0-pro",
        non_default_params={"max_completion_tokens": 10},
        optional_params={},
        drop_params=False,
    ) == {"max_output_tokens": 10}
    assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params()
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@ -600,3 +600,35 @@ def test_o1_model_params():
    )
    assert optional_params["seed"] == 10
    assert optional_params["user"] == "John"
@pytest.mark.parametrize(
    "temperature, expected_error",
    [(0.2, True), (1, False)],
 )
 def test_o1_model_temperature_params(temperature, expected_error):
    if expected_error:
        with pytest.raises(litellm.UnsupportedParamsError):
            get_optional_params(
                model="o1-preview-2024-09-12",
                custom_llm_provider="openai",
                temperature=temperature,
            )
    else:
        get_optional_params(
            model="o1-preview-2024-09-12",
            custom_llm_provider="openai",
            temperature=temperature,
        )
 def test_unmapped_gemini_model_params():
    """
    Test if unmapped gemini model optional params are translated correctly
    """
    optional_params = get_optional_params(
        model="gemini-new-model",
        custom_llm_provider="vertex_ai",
        stop="stop_word",
    )
    assert optional_params["stop_sequences"] == ["stop_word"]