From a1d9e96b310770838bab03fa9a967940d4a45a72 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Thu, 26 Sep 2024 16:41:44 -0700 Subject: [PATCH] LiteLLM Minor Fixes & Improvements (09/25/2024) (#5893) * fix(langfuse.py): support new langfuse prompt_chat class init params * fix(langfuse.py): handle new init values on prompt chat + prompt text templates fixes error caused during langfuse logging * docs(openai_compatible.md): clarify `openai/` handles correct routing for `/v1/completions` route Fixes https://github.com/BerriAI/litellm/issues/5876 * fix(utils.py): handle unmapped gemini model optional param translation Fixes https://github.com/BerriAI/litellm/issues/5888 * fix(o1_transformation.py): fix o-1 validation, to not raise error if temperature=1 Fixes https://github.com/BerriAI/litellm/issues/5884 * fix(prisma_client.py): refresh iam token Fixes https://github.com/BerriAI/litellm/issues/5896 * fix: pass drop params where required * fix(utils.py): pass drop_params correctly * fix(types/vertex_ai.py): fix generation config * test(test_max_completion_tokens.py): fix test * fix(vertex_and_google_ai_studio_gemini.py): fix map openai params --- .../docs/providers/openai_compatible.md | 2 +- litellm/integrations/langfuse.py | 37 +- .../llms/OpenAI/chat/gpt_transformation.py | 13 +- litellm/llms/OpenAI/chat/o1_transformation.py | 28 +- litellm/llms/OpenAI/openai.py | 8 +- .../gemini/transformation.py | 20 +- .../vertex_and_google_ai_studio_gemini.py | 329 ++++++------------ .../ai21/transformation.py | 7 +- .../llama3/transformation.py | 7 +- litellm/proxy/_new_secret_config.yaml | 14 +- litellm/proxy/auth/rds_iam_token.py | 42 ++- litellm/proxy/db/prisma_client.py | 106 ++++++ litellm/proxy/proxy_cli.py | 30 +- litellm/proxy/utils.py | 27 +- litellm/secret_managers/main.py | 5 +- litellm/tests/test_alangfuse.py | 256 ++++++++++++++ litellm/types/llms/vertex_ai.py | 1 + litellm/utils.py | 21 +- package-lock.json | 56 +++ package.json | 1 + .../test_max_completion_tokens.py | 5 +- tests/llm_translation/test_optional_params.py | 32 ++ 22 files changed, 755 insertions(+), 292 deletions(-) create mode 100644 litellm/proxy/db/prisma_client.py diff --git a/docs/my-website/docs/providers/openai_compatible.md b/docs/my-website/docs/providers/openai_compatible.md index f6225d716..c7f9bf6f4 100644 --- a/docs/my-website/docs/providers/openai_compatible.md +++ b/docs/my-website/docs/providers/openai_compatible.md @@ -7,7 +7,7 @@ To call models hosted behind an openai proxy, make 2 changes: 1. For `/chat/completions`: Put `openai/` in front of your model name, so litellm knows you're trying to call an openai `/chat/completions` endpoint. -2. For `/completions`: Put `text-completion-openai/` in front of your model name, so litellm knows you're trying to call an openai `/completions` endpoint. +2. For `/completions`: Put `text-completion-openai/` in front of your model name, so litellm knows you're trying to call an openai `/completions` endpoint. [NOT REQUIRED for `openai/` endpoints called via `/v1/completions` route]. 2. **Do NOT** add anything additional to the base url e.g. `/v1/embedding`. LiteLLM uses the openai-client to make these calls, and that automatically adds the relevant endpoints. diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index b2a084da3..0819cc703 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -1,6 +1,7 @@ #### What this does #### # On success, logs events to Langfuse import copy +import inspect import os import traceback @@ -676,21 +677,37 @@ def _add_prompt_to_generation_params( elif "version" in user_prompt and "prompt" in user_prompt: # prompts if isinstance(user_prompt["prompt"], str): - _prompt_obj = Prompt_Text( - name=user_prompt["name"], - prompt=user_prompt["prompt"], - version=user_prompt["version"], - config=user_prompt.get("config", None), + prompt_text_params = getattr( + Prompt_Text, "model_fields", Prompt_Text.__fields__ ) + _data = { + "name": user_prompt["name"], + "prompt": user_prompt["prompt"], + "version": user_prompt["version"], + "config": user_prompt.get("config", None), + } + if "labels" in prompt_text_params and "tags" in prompt_text_params: + _data["labels"] = user_prompt.get("labels", []) or [] + _data["tags"] = user_prompt.get("tags", []) or [] + _prompt_obj = Prompt_Text(**_data) # type: ignore generation_params["prompt"] = TextPromptClient(prompt=_prompt_obj) elif isinstance(user_prompt["prompt"], list): - _prompt_obj = Prompt_Chat( - name=user_prompt["name"], - prompt=user_prompt["prompt"], - version=user_prompt["version"], - config=user_prompt.get("config", None), + prompt_chat_params = getattr( + Prompt_Chat, "model_fields", Prompt_Chat.__fields__ ) + _data = { + "name": user_prompt["name"], + "prompt": user_prompt["prompt"], + "version": user_prompt["version"], + "config": user_prompt.get("config", None), + } + if "labels" in prompt_chat_params and "tags" in prompt_chat_params: + _data["labels"] = user_prompt.get("labels", []) or [] + _data["tags"] = user_prompt.get("tags", []) or [] + + _prompt_obj = Prompt_Chat(**_data) # type: ignore + generation_params["prompt"] = ChatPromptClient(prompt=_prompt_obj) else: verbose_logger.error( diff --git a/litellm/llms/OpenAI/chat/gpt_transformation.py b/litellm/llms/OpenAI/chat/gpt_transformation.py index 4ff4790c9..6331322bf 100644 --- a/litellm/llms/OpenAI/chat/gpt_transformation.py +++ b/litellm/llms/OpenAI/chat/gpt_transformation.py @@ -125,7 +125,11 @@ class OpenAIGPTConfig: return base_params + model_specific_params def _map_openai_params( - self, non_default_params: dict, optional_params: dict, model: str + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, ) -> dict: supported_openai_params = self.get_supported_openai_params(model) for param, value in non_default_params.items(): @@ -134,10 +138,15 @@ class OpenAIGPTConfig: return optional_params def map_openai_params( - self, non_default_params: dict, optional_params: dict, model: str + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, ) -> dict: return self._map_openai_params( non_default_params=non_default_params, optional_params=optional_params, model=model, + drop_params=drop_params, ) diff --git a/litellm/llms/OpenAI/chat/o1_transformation.py b/litellm/llms/OpenAI/chat/o1_transformation.py index 200097f67..c7581ae27 100644 --- a/litellm/llms/OpenAI/chat/o1_transformation.py +++ b/litellm/llms/OpenAI/chat/o1_transformation.py @@ -57,7 +57,6 @@ class OpenAIO1Config(OpenAIGPTConfig): "parallel_tool_calls", "function_call", "functions", - "temperature", "top_p", "n", "presence_penalty", @@ -73,13 +72,36 @@ class OpenAIO1Config(OpenAIGPTConfig): ] def map_openai_params( - self, non_default_params: dict, optional_params: dict, model: str + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, ): if "max_tokens" in non_default_params: optional_params["max_completion_tokens"] = non_default_params.pop( "max_tokens" ) - return super()._map_openai_params(non_default_params, optional_params, model) + if "temperature" in non_default_params: + temperature_value: Optional[float] = non_default_params.pop("temperature") + if temperature_value is not None: + if temperature_value == 0 or temperature_value == 1: + optional_params["temperature"] = temperature_value + else: + ## UNSUPPORTED TOOL CHOICE VALUE + if litellm.drop_params is True or drop_params is True: + pass + else: + raise litellm.utils.UnsupportedParamsError( + message="O-1 doesn't support temperature={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format( + temperature_value + ), + status_code=400, + ) + + return super()._map_openai_params( + non_default_params, optional_params, model, drop_params + ) def is_model_o1_reasoning_model(self, model: str) -> bool: if model in litellm.open_ai_chat_completion_models and "o1" in model: diff --git a/litellm/llms/OpenAI/openai.py b/litellm/llms/OpenAI/openai.py index aafb14bd1..5df5b1132 100644 --- a/litellm/llms/OpenAI/openai.py +++ b/litellm/llms/OpenAI/openai.py @@ -413,7 +413,11 @@ class OpenAIConfig: return optional_params def map_openai_params( - self, non_default_params: dict, optional_params: dict, model: str + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, ) -> dict: """ """ if litellm.OpenAIO1Config().is_model_o1_reasoning_model(model=model): @@ -421,11 +425,13 @@ class OpenAIConfig: non_default_params=non_default_params, optional_params=optional_params, model=model, + drop_params=drop_params, ) return litellm.OpenAIGPTConfig().map_openai_params( non_default_params=non_default_params, optional_params=optional_params, model=model, + drop_params=drop_params, ) diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py index 78874d544..075c0d169 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py @@ -22,7 +22,7 @@ from litellm.types.llms.vertex_ai import ( Tools, ) -from ..common_utils import get_supports_system_message, get_supports_response_schema +from ..common_utils import get_supports_response_schema, get_supports_system_message from ..vertex_ai_non_gemini import _gemini_convert_messages_with_history @@ -73,8 +73,14 @@ def _transform_request_body( safety_settings: Optional[List[SafetSettingsConfig]] = optional_params.pop( "safety_settings", None ) # type: ignore + config_fields = GenerationConfig.__annotations__.keys() + + filtered_params = { + k: v for k, v in optional_params.items() if k in config_fields + } + generation_config: Optional[GenerationConfig] = GenerationConfig( - **optional_params + **filtered_params ) data = RequestBody(contents=content) if system_instructions is not None: @@ -104,7 +110,7 @@ def sync_transform_request_body( timeout: Optional[Union[float, httpx.Timeout]], extra_headers: Optional[dict], optional_params: dict, - logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"], litellm_params: dict, ) -> RequestBody: @@ -146,7 +152,7 @@ async def async_transform_request_body( timeout: Optional[Union[float, httpx.Timeout]], extra_headers: Optional[dict], optional_params: dict, - logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"], litellm_params: dict, ) -> RequestBody: @@ -199,6 +205,7 @@ def _transform_system_message( if supports_system_message is True: for idx, message in enumerate(messages): if message["role"] == "system": + _system_content_block: Optional[PartType] = None if isinstance(message["content"], str): _system_content_block = PartType(text=message["content"]) elif isinstance(message["content"], list): @@ -206,8 +213,9 @@ def _transform_system_message( for content in message["content"]: system_text += content.get("text") or "" _system_content_block = PartType(text=system_text) - system_content_blocks.append(_system_content_block) - system_prompt_indices.append(idx) + if _system_content_block is not None: + system_content_blocks.append(_system_content_block) + system_prompt_indices.append(idx) if len(system_prompt_indices) > 0: for idx in reversed(system_prompt_indices): messages.pop(idx) diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py index 35a7e8337..78bd51003 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py @@ -252,233 +252,6 @@ class VertexAIConfig: ] -class GoogleAIStudioGeminiConfig: # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported - """ - Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig - - The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters: - - - `temperature` (float): This controls the degree of randomness in token selection. - - - `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256. - - - `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95. - - - `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40. - - - `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`. - - - `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response. - - - `candidate_count` (int): Number of generated responses to return. - - - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response. - - Note: Please make sure to modify the default parameters as required for your use case. - """ - - temperature: Optional[float] = None - max_output_tokens: Optional[int] = None - top_p: Optional[float] = None - top_k: Optional[int] = None - response_mime_type: Optional[str] = None - response_schema: Optional[dict] = None - candidate_count: Optional[int] = None - stop_sequences: Optional[list] = None - - def __init__( - self, - temperature: Optional[float] = None, - max_output_tokens: Optional[int] = None, - top_p: Optional[float] = None, - top_k: Optional[int] = None, - response_mime_type: Optional[str] = None, - response_schema: Optional[dict] = None, - candidate_count: Optional[int] = None, - stop_sequences: Optional[list] = None, - ) -> None: - locals_ = locals() - for key, value in locals_.items(): - if key != "self" and value is not None: - setattr(self.__class__, key, value) - - @classmethod - def get_config(cls): - return { - k: v - for k, v in cls.__dict__.items() - if not k.startswith("__") - and not isinstance( - v, - ( - types.FunctionType, - types.BuiltinFunctionType, - classmethod, - staticmethod, - ), - ) - and v is not None - } - - def get_supported_openai_params(self): - return [ - "temperature", - "top_p", - "max_tokens", - "max_completion_tokens", - "stream", - "tools", - "tool_choice", - "functions", - "response_format", - "n", - "stop", - ] - - def _map_function(self, value: List[dict]) -> List[Tools]: - gtool_func_declarations = [] - googleSearchRetrieval: Optional[dict] = None - - for tool in value: - openai_function_object: Optional[ChatCompletionToolParamFunctionChunk] = ( - None - ) - if "function" in tool: # tools list - openai_function_object = ChatCompletionToolParamFunctionChunk( # type: ignore - **tool["function"] - ) - elif "name" in tool: # functions list - openai_function_object = ChatCompletionToolParamFunctionChunk(**tool) # type: ignore - - # check if grounding - if tool.get("googleSearchRetrieval", None) is not None: - googleSearchRetrieval = tool["googleSearchRetrieval"] - elif openai_function_object is not None: - gtool_func_declaration = FunctionDeclaration( - name=openai_function_object["name"], - description=openai_function_object.get("description", ""), - parameters=openai_function_object.get("parameters", {}), - ) - gtool_func_declarations.append(gtool_func_declaration) - else: - # assume it's a provider-specific param - verbose_logger.warning( - "Invalid tool={}. Use `litellm.set_verbose` or `litellm --detailed_debug` to see raw request." - ) - - _tools = Tools( - function_declarations=gtool_func_declarations, - ) - if googleSearchRetrieval is not None: - _tools["googleSearchRetrieval"] = googleSearchRetrieval - return [_tools] - - def map_tool_choice_values( - self, model: str, tool_choice: Union[str, dict] - ) -> Optional[ToolConfig]: - if tool_choice == "none": - return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="NONE")) - elif tool_choice == "required": - return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="ANY")) - elif tool_choice == "auto": - return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="AUTO")) - elif isinstance(tool_choice, dict): - # only supported for anthropic + mistral models - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html - name = tool_choice.get("function", {}).get("name", "") - return ToolConfig( - functionCallingConfig=FunctionCallingConfig( - mode="ANY", allowed_function_names=[name] - ) - ) - else: - raise litellm.utils.UnsupportedParamsError( - message="VertexAI doesn't support tool_choice={}. Supported tool_choice values=['auto', 'required', json object]. To drop it from the call, set `litellm.drop_params = True.".format( - tool_choice - ), - status_code=400, - ) - - def map_openai_params( - self, - model: str, - non_default_params: dict, - optional_params: dict, - ): - for param, value in non_default_params.items(): - if param == "temperature": - optional_params["temperature"] = value - if param == "top_p": - optional_params["top_p"] = value - if ( - param == "stream" and value is True - ): # sending stream = False, can cause it to get passed unchecked and raise issues - optional_params["stream"] = value - if param == "n": - optional_params["candidate_count"] = value - if param == "stop": - if isinstance(value, str): - optional_params["stop_sequences"] = [value] - elif isinstance(value, list): - optional_params["stop_sequences"] = value - if param == "max_tokens" or param == "max_completion_tokens": - optional_params["max_output_tokens"] = value - if param == "response_format": # type: ignore - if value["type"] == "json_object": # type: ignore - if value["type"] == "json_object": # type: ignore - optional_params["response_mime_type"] = "application/json" - elif value["type"] == "text": # type: ignore - optional_params["response_mime_type"] = "text/plain" - if "response_schema" in value: # type: ignore - optional_params["response_mime_type"] = "application/json" - optional_params["response_schema"] = value["response_schema"] # type: ignore - elif value["type"] == "json_schema": # type: ignore - if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore - optional_params["response_mime_type"] = "application/json" - optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore - if (param == "tools" or param == "functions") and isinstance(value, list): - optional_params["tools"] = self._map_function(value=value) - optional_params["litellm_param_is_function_call"] = ( - True if param == "functions" else False - ) - if param == "tool_choice" and ( - isinstance(value, str) or isinstance(value, dict) - ): - _tool_choice_value = self.map_tool_choice_values( - model=model, tool_choice=value # type: ignore - ) - if _tool_choice_value is not None: - optional_params["tool_choice"] = _tool_choice_value - return optional_params - - def get_mapped_special_auth_params(self) -> dict: - """ - Common auth params across bedrock/vertex_ai/azure/watsonx - """ - return {"project": "vertex_project", "region_name": "vertex_location"} - - def map_special_auth_params(self, non_default_params: dict, optional_params: dict): - mapped_params = self.get_mapped_special_auth_params() - - for param, value in non_default_params.items(): - if param in mapped_params: - optional_params[mapped_params[param]] = value - return optional_params - - def get_flagged_finish_reasons(self) -> Dict[str, str]: - """ - Return Dictionary of finish reasons which indicate response was flagged - - and what it means - """ - return { - "SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.", - "RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.", - "BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.", - "PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.", - "SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.", - } - - class VertexGeminiConfig: """ Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts @@ -752,6 +525,108 @@ class VertexGeminiConfig: return exception_string +class GoogleAIStudioGeminiConfig( + VertexGeminiConfig +): # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported + """ + Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig + + The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters: + + - `temperature` (float): This controls the degree of randomness in token selection. + + - `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256. + + - `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95. + + - `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40. + + - `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`. + + - `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response. + + - `candidate_count` (int): Number of generated responses to return. + + - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response. + + Note: Please make sure to modify the default parameters as required for your use case. + """ + + temperature: Optional[float] = None + max_output_tokens: Optional[int] = None + top_p: Optional[float] = None + top_k: Optional[int] = None + response_mime_type: Optional[str] = None + response_schema: Optional[dict] = None + candidate_count: Optional[int] = None + stop_sequences: Optional[list] = None + + def __init__( + self, + temperature: Optional[float] = None, + max_output_tokens: Optional[int] = None, + top_p: Optional[float] = None, + top_k: Optional[int] = None, + response_mime_type: Optional[str] = None, + response_schema: Optional[dict] = None, + candidate_count: Optional[int] = None, + stop_sequences: Optional[list] = None, + ) -> None: + locals_ = locals() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return { + k: v + for k, v in cls.__dict__.items() + if not k.startswith("__") + and not isinstance( + v, + ( + types.FunctionType, + types.BuiltinFunctionType, + classmethod, + staticmethod, + ), + ) + and v is not None + } + + def get_supported_openai_params(self): + return [ + "temperature", + "top_p", + "max_tokens", + "max_completion_tokens", + "stream", + "tools", + "tool_choice", + "functions", + "response_format", + "n", + "stop", + ] + + def map_openai_params( + self, + model: str, + non_default_params: Dict, + optional_params: Dict, + drop_params: bool, + ): + # drop frequency_penalty and presence_penalty + if "frequency_penalty" in non_default_params: + del non_default_params["frequency_penalty"] + if "presence_penalty" in non_default_params: + del non_default_params["presence_penalty"] + return super().map_openai_params( + model, non_default_params, optional_params, drop_params + ) + + async def make_call( client: Optional[AsyncHTTPHandler], api_base: str, diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py index 2d9d6076e..cb3364445 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py @@ -44,7 +44,11 @@ class VertexAIAi21Config: return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo") def map_openai_params( - self, non_default_params: dict, optional_params: dict, model: str + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, ): if "max_completion_tokens" in non_default_params: non_default_params["max_tokens"] = non_default_params.pop( @@ -54,4 +58,5 @@ class VertexAIAi21Config: non_default_params=non_default_params, optional_params=optional_params, model=model, + drop_params=drop_params, ) diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py index 683e0ff8e..2170a9241 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py @@ -50,7 +50,11 @@ class VertexAILlama3Config: return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo") def map_openai_params( - self, non_default_params: dict, optional_params: dict, model: str + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, ): if "max_completion_tokens" in non_default_params: non_default_params["max_tokens"] = non_default_params.pop( @@ -60,4 +64,5 @@ class VertexAILlama3Config: non_default_params=non_default_params, optional_params=optional_params, model=model, + drop_params=drop_params, ) diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 8cc73b050..c52972be0 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -31,15 +31,21 @@ model_list: - model_name: "anthropic/*" litellm_params: model: "anthropic/*" - - model_name: "openai/*" + - model_name: "*" litellm_params: model: "openai/*" - model_name: "fireworks_ai/*" litellm_params: model: "fireworks_ai/*" configurable_clientside_auth_params: ["api_base"] - + - model_name: "gemini-flash-experimental" + litellm_params: + model: "vertex_ai/gemini-flash-experimental" litellm_settings: - success_callback: ["langfuse"] - cache: true \ No newline at end of file + success_callback: ["langfuse", "prometheus"] + failure_callback: ["prometheus"] + +general_settings: + proxy_budget_rescheduler_min_time: 1 + proxy_budget_rescheduler_max_time: 1 \ No newline at end of file diff --git a/litellm/proxy/auth/rds_iam_token.py b/litellm/proxy/auth/rds_iam_token.py index f83621584..474a9cac2 100644 --- a/litellm/proxy/auth/rds_iam_token.py +++ b/litellm/proxy/auth/rds_iam_token.py @@ -1,5 +1,5 @@ import os -from typing import Optional, Union +from typing import Any, Optional, Union import httpx @@ -34,7 +34,7 @@ def init_rds_client( # Iterate over parameters and update if needed for i, param in enumerate(params_to_check): if param and param.startswith("os.environ/"): - params_to_check[i] = get_secret(param) + params_to_check[i] = get_secret(param) # type: ignore # Assign updated values back to parameters ( aws_access_key_id, @@ -62,13 +62,13 @@ def init_rds_client( import boto3 if isinstance(timeout, float): - config = boto3.session.Config(connect_timeout=timeout, read_timeout=timeout) + config = boto3.session.Config(connect_timeout=timeout, read_timeout=timeout) # type: ignore elif isinstance(timeout, httpx.Timeout): - config = boto3.session.Config( + config = boto3.session.Config( # type: ignore connect_timeout=timeout.connect, read_timeout=timeout.read ) else: - config = boto3.session.Config() + config = boto3.session.Config() # type: ignore ### CHECK STS ### if ( @@ -105,6 +105,7 @@ def init_rds_client( region_name=region_name, config=config, ) + elif aws_role_name is not None and aws_session_name is not None: # use sts if role name passed in sts_client = boto3.client( @@ -144,6 +145,7 @@ def init_rds_client( region_name=region_name, config=config, ) + else: # aws_access_key_id is None, assume user is trying to auth using env variables # boto3 automatically reads env variables @@ -157,25 +159,31 @@ def init_rds_client( return client -def generate_iam_auth_token(db_host, db_port, db_user) -> str: +def generate_iam_auth_token( + db_host, db_port, db_user, client: Optional[Any] = None +) -> str: from urllib.parse import quote import boto3 - boto_client = init_rds_client( - aws_region_name=os.getenv("AWS_REGION_NAME"), - aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), - aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), - aws_session_name=os.getenv("AWS_SESSION_NAME"), - aws_profile_name=os.getenv("AWS_PROFILE_NAME"), - aws_role_name=os.getenv("AWS_ROLE_NAME", os.getenv("AWS_ROLE_ARN")), - aws_web_identity_token=os.getenv( - "AWS_WEB_IDENTITY_TOKEN", os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE") - ), - ) + if client is None: + boto_client = init_rds_client( + aws_region_name=os.getenv("AWS_REGION_NAME"), + aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + aws_session_name=os.getenv("AWS_SESSION_NAME"), + aws_profile_name=os.getenv("AWS_PROFILE_NAME"), + aws_role_name=os.getenv("AWS_ROLE_NAME", os.getenv("AWS_ROLE_ARN")), + aws_web_identity_token=os.getenv( + "AWS_WEB_IDENTITY_TOKEN", os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE") + ), + ) + else: + boto_client = client token = boto_client.generate_db_auth_token( DBHostname=db_host, Port=db_port, DBUsername=db_user ) cleaned_token = quote(token, safe="") + return cleaned_token diff --git a/litellm/proxy/db/prisma_client.py b/litellm/proxy/db/prisma_client.py new file mode 100644 index 000000000..5e7fc4f79 --- /dev/null +++ b/litellm/proxy/db/prisma_client.py @@ -0,0 +1,106 @@ +import asyncio +import os +import urllib +import urllib.parse +from datetime import datetime, timedelta +from typing import Any, Callable, Optional + + +class PrismaWrapper: + def __init__(self, original_prisma: Any, iam_token_db_auth: bool): + self._original_prisma = original_prisma + self.iam_token_db_auth = iam_token_db_auth + + def is_token_expired(self, token_url: Optional[str]) -> bool: + if token_url is None: + return True + # Decode the token URL to handle URL-encoded characters + decoded_url = urllib.parse.unquote(token_url) + + # Parse the token URL + parsed_url = urllib.parse.urlparse(decoded_url) + + # Parse the query parameters from the path component (if they exist there) + query_params = urllib.parse.parse_qs(parsed_url.query) + + # Get expiration time from the query parameters + expires = query_params.get("X-Amz-Expires", [None])[0] + if expires is None: + raise ValueError("X-Amz-Expires parameter is missing or invalid.") + + expires_int = int(expires) + + # Get the token's creation time from the X-Amz-Date parameter + token_time_str = query_params.get("X-Amz-Date", [""])[0] + if not token_time_str: + raise ValueError("X-Amz-Date parameter is missing or invalid.") + + # Ensure the token time string is parsed correctly + try: + token_time = datetime.strptime(token_time_str, "%Y%m%dT%H%M%SZ") + except ValueError as e: + raise ValueError(f"Invalid X-Amz-Date format: {e}") + + # Calculate the expiration time + expiration_time = token_time + timedelta(seconds=expires_int) + + # Current time in UTC + current_time = datetime.utcnow() + + # Check if the token is expired + return current_time > expiration_time + + def get_rds_iam_token(self) -> Optional[str]: + if self.iam_token_db_auth: + from litellm.proxy.auth.rds_iam_token import generate_iam_auth_token + + db_host = os.getenv("DATABASE_HOST") + db_port = os.getenv("DATABASE_PORT") + db_user = os.getenv("DATABASE_USER") + db_name = os.getenv("DATABASE_NAME") + db_schema = os.getenv("DATABASE_SCHEMA") + + token = generate_iam_auth_token( + db_host=db_host, db_port=db_port, db_user=db_user + ) + + # print(f"token: {token}") + _db_url = f"postgresql://{db_user}:{token}@{db_host}:{db_port}/{db_name}" + if db_schema: + _db_url += f"?schema={db_schema}" + + os.environ["DATABASE_URL"] = _db_url + return _db_url + return None + + async def recreate_prisma_client( + self, new_db_url: str, http_client: Optional[Any] = None + ): + from prisma import Prisma # type: ignore + + if http_client is not None: + self._original_prisma = Prisma(http=http_client) + else: + self._original_prisma = Prisma() + + await self._original_prisma.connect() + + def __getattr__(self, name: str): + original_attr = getattr(self._original_prisma, name) + if self.iam_token_db_auth: + db_url = os.getenv("DATABASE_URL") + if self.is_token_expired(db_url): + db_url = self.get_rds_iam_token() + loop = asyncio.get_event_loop() + + if db_url: + if loop.is_running(): + asyncio.run_coroutine_threadsafe( + self.recreate_prisma_client(db_url), loop + ) + else: + asyncio.run(self.recreate_prisma_client(db_url)) + else: + raise ValueError("Failed to get RDS IAM token") + + return original_attr diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 7da9fe479..9003b885e 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -40,7 +40,7 @@ def append_query_params(url, params) -> str: parsed_query.update(params) encoded_query = urlparse.urlencode(parsed_query, doseq=True) modified_url = urlparse.urlunparse(parsed_url._replace(query=encoded_query)) - return modified_url + return modified_url # type: ignore def run_ollama_serve(): @@ -287,7 +287,7 @@ def run_server( save_worker_config, ) if version == True: - pkg_version = importlib.metadata.version("litellm") + pkg_version = importlib.metadata.version("litellm") # type: ignore click.echo(f"\nLiteLLM: Current Version = {pkg_version}\n") return if model and "ollama" in model and api_base is None: @@ -338,14 +338,14 @@ def run_server( futures = [] start_time = time.time() # Make concurrent calls - with concurrent.futures.ThreadPoolExecutor( + with concurrent.futures.ThreadPoolExecutor( # type: ignore max_workers=concurrent_calls ) as executor: for _ in range(concurrent_calls): futures.append(executor.submit(_make_openai_completion)) # Wait for all futures to complete - concurrent.futures.wait(futures) + concurrent.futures.wait(futures) # type: ignore # Summarize the results successful_calls = 0 @@ -476,6 +476,7 @@ def run_server( _db_url += f"?schema={db_schema}" os.environ["DATABASE_URL"] = _db_url + os.environ["IAM_TOKEN_DB_AUTH"] = "True" ### DECRYPT ENV VAR ### @@ -600,8 +601,9 @@ def run_server( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path - for litellm local dev import litellm + from litellm import get_secret_str - database_url = litellm.get_secret(database_url, default_value=None) + database_url = get_secret_str(database_url, default_value=None) os.chdir(original_dir) if database_url is not None and isinstance(database_url, str): os.environ["DATABASE_URL"] = database_url @@ -650,6 +652,8 @@ def run_server( subprocess.run(["prisma", "db", "push", "--accept-data-loss"]) break # Exit the loop if the subprocess succeeds except subprocess.CalledProcessError as e: + import time + print(f"Error: {e}") # noqa time.sleep(random.randrange(start=1, stop=5)) finally: @@ -728,13 +732,17 @@ def run_server( def load_config(self): # note: This Loads the gunicorn config - has nothing to do with LiteLLM Proxy config - config = { - key: value - for key, value in self.options.items() - if key in self.cfg.settings and value is not None - } + if self.cfg is not None: + config = { + key: value + for key, value in self.options.items() + if key in self.cfg.settings and value is not None + } + else: + config = {} for key, value in config.items(): - self.cfg.set(key.lower(), value) + if self.cfg is not None: + self.cfg.set(key.lower(), value) def load(self): # gunicorn app function diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 80eea79fb..18361bca1 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -65,11 +65,13 @@ from litellm.proxy.db.create_views import ( create_missing_views, should_create_missing_views, ) +from litellm.proxy.db.prisma_client import PrismaWrapper from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter from litellm.proxy.hooks.parallel_request_limiter import ( _PROXY_MaxParallelRequestsHandler, ) +from litellm.secret_managers.main import str_to_bool from litellm.types.utils import CallTypes, LoggedLiteLLMParams if TYPE_CHECKING: @@ -1017,6 +1019,9 @@ class PrismaClient: ) ## init logging object self.proxy_logging_obj = proxy_logging_obj + self.iam_token_db_auth: Optional[bool] = str_to_bool( + os.getenv("IAM_TOKEN_DB_AUTH") + ) try: from prisma import Prisma # type: ignore except Exception as e: @@ -1043,9 +1048,23 @@ class PrismaClient: from prisma import Prisma # type: ignore verbose_proxy_logger.debug("Connecting Prisma Client to DB..") if http_client is not None: - self.db = Prisma(http=http_client) + self.db = PrismaWrapper( + original_prisma=Prisma(http=http_client), + iam_token_db_auth=( + self.iam_token_db_auth + if self.iam_token_db_auth is not None + else False + ), + ) else: - self.db = Prisma() # Client to connect to Prisma db + self.db = PrismaWrapper( + original_prisma=Prisma(), + iam_token_db_auth=( + self.iam_token_db_auth + if self.iam_token_db_auth is not None + else False + ), + ) # Client to connect to Prisma db verbose_proxy_logger.debug("Success - Connected Prisma Client to DB") def hash_token(self, token: str): @@ -1141,9 +1160,9 @@ class PrismaClient: "LiteLLM_VerificationTokenView Created in DB!" ) else: - should_create_views = await should_create_missing_views(db=self.db) + should_create_views = await should_create_missing_views(db=self.db.db) # type: ignore if should_create_views: - await create_missing_views(db=self.db) + await create_missing_views(db=self.db) # type: ignore else: # don't block execution if these views are missing # Convert lists to sets for efficient difference calculation diff --git a/litellm/secret_managers/main.py b/litellm/secret_managers/main.py index e98140768..ec277a6ae 100644 --- a/litellm/secret_managers/main.py +++ b/litellm/secret_managers/main.py @@ -29,7 +29,7 @@ def _is_base64(s): return False -def str_to_bool(value: str) -> Optional[bool]: +def str_to_bool(value: Optional[str]) -> Optional[bool]: """ Converts a string to a boolean if it's a recognized boolean string. Returns None if the string is not a recognized boolean value. @@ -37,6 +37,9 @@ def str_to_bool(value: str) -> Optional[bool]: :param value: The string to be checked. :return: True or False if the string is a recognized boolean, otherwise None. """ + if value is None: + return None + true_values = {"true"} false_values = {"false"} diff --git a/litellm/tests/test_alangfuse.py b/litellm/tests/test_alangfuse.py index 903b01411..fa1260637 100644 --- a/litellm/tests/test_alangfuse.py +++ b/litellm/tests/test_alangfuse.py @@ -968,3 +968,259 @@ def test_aaalangfuse_dynamic_logging(): ) langfuse_client.get_trace(id=trace_id) + + +import datetime + +generation_params = { + "name": "litellm-acompletion", + "id": "time-10-35-32-316778_chatcmpl-ABQDEzVJS8fziPdvkeTA3tnQaxeMX", + "start_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 316778), + "end_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 897141), + "model": "gpt-4o", + "model_parameters": { + "stream": False, + "max_retries": 0, + "extra_body": "{}", + "system_fingerprint": "fp_52a7f40b0b", + }, + "input": { + "messages": [ + {"content": "<>", "role": "system"}, + {"content": "<>", "role": "user"}, + ] + }, + "output": { + "content": "Hello! It looks like your message might have been sent by accident. How can I assist you today?", + "role": "assistant", + "tool_calls": None, + "function_call": None, + }, + "usage": {"prompt_tokens": 13, "completion_tokens": 21, "total_cost": 0.00038}, + "metadata": { + "prompt": { + "name": "conversational-service-answer_question_restricted_reply", + "version": 9, + "config": {}, + "labels": ["latest", "staging", "production"], + "tags": ["conversational-service"], + "prompt": [ + {"role": "system", "content": "<>"}, + {"role": "user", "content": "{{text}}"}, + ], + }, + "requester_metadata": { + "session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1", + "trace_name": "jess", + "tags": ["conversational-service", "generative-ai-engine", "staging"], + "prompt": { + "name": "conversational-service-answer_question_restricted_reply", + "version": 9, + "config": {}, + "labels": ["latest", "staging", "production"], + "tags": ["conversational-service"], + "prompt": [ + {"role": "system", "content": "<>"}, + {"role": "user", "content": "{{text}}"}, + ], + }, + }, + "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b", + "litellm_api_version": "0.0.0", + "user_api_key_user_id": "default_user_id", + "user_api_key_spend": 0.0, + "user_api_key_metadata": {}, + "requester_ip_address": "127.0.0.1", + "model_group": "gpt-4o", + "model_group_size": 0, + "deployment": "gpt-4o", + "model_info": { + "id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4", + "db_model": False, + }, + "hidden_params": { + "headers": { + "date": "Wed, 25 Sep 2024 17:35:32 GMT", + "content-type": "application/json", + "transfer-encoding": "chunked", + "connection": "keep-alive", + "access-control-expose-headers": "X-Request-ID", + "openai-organization": "reliablekeystest", + "openai-processing-ms": "329", + "openai-version": "2020-10-01", + "strict-transport-security": "max-age=31536000; includeSubDomains; preload", + "x-ratelimit-limit-requests": "10000", + "x-ratelimit-limit-tokens": "30000000", + "x-ratelimit-remaining-requests": "9999", + "x-ratelimit-remaining-tokens": "29999980", + "x-ratelimit-reset-requests": "6ms", + "x-ratelimit-reset-tokens": "0s", + "x-request-id": "req_fdff3bfa11c391545d2042d46473214f", + "cf-cache-status": "DYNAMIC", + "set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None", + "x-content-type-options": "nosniff", + "server": "cloudflare", + "cf-ray": "8c8cc573becb232c-SJC", + "content-encoding": "gzip", + "alt-svc": 'h3=":443"; ma=86400', + }, + "additional_headers": { + "llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT", + "llm_provider-content-type": "application/json", + "llm_provider-transfer-encoding": "chunked", + "llm_provider-connection": "keep-alive", + "llm_provider-access-control-expose-headers": "X-Request-ID", + "llm_provider-openai-organization": "reliablekeystest", + "llm_provider-openai-processing-ms": "329", + "llm_provider-openai-version": "2020-10-01", + "llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload", + "llm_provider-x-ratelimit-limit-requests": "10000", + "llm_provider-x-ratelimit-limit-tokens": "30000000", + "llm_provider-x-ratelimit-remaining-requests": "9999", + "llm_provider-x-ratelimit-remaining-tokens": "29999980", + "llm_provider-x-ratelimit-reset-requests": "6ms", + "llm_provider-x-ratelimit-reset-tokens": "0s", + "llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f", + "llm_provider-cf-cache-status": "DYNAMIC", + "llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None", + "llm_provider-x-content-type-options": "nosniff", + "llm_provider-server": "cloudflare", + "llm_provider-cf-ray": "8c8cc573becb232c-SJC", + "llm_provider-content-encoding": "gzip", + "llm_provider-alt-svc": 'h3=":443"; ma=86400', + }, + "litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c", + "model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4", + "api_base": "https://api.openai.com", + "optional_params": { + "stream": False, + "max_retries": 0, + "extra_body": {}, + }, + "response_cost": 0.00038, + }, + "litellm_response_cost": 0.00038, + "api_base": "https://api.openai.com/v1/", + "cache_hit": False, + }, + "level": "DEFAULT", + "version": None, +} + + +@pytest.mark.parametrize( + "prompt", + [ + [ + {"role": "system", "content": "<>"}, + {"role": "user", "content": "{{text}}"}, + ], + "hello world", + ], +) +def test_langfuse_prompt_type(prompt): + + from litellm.integrations.langfuse import _add_prompt_to_generation_params + + clean_metadata = { + "prompt": { + "name": "conversational-service-answer_question_restricted_reply", + "version": 9, + "config": {}, + "labels": ["latest", "staging", "production"], + "tags": ["conversational-service"], + "prompt": prompt, + }, + "requester_metadata": { + "session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1", + "trace_name": "jess", + "tags": ["conversational-service", "generative-ai-engine", "staging"], + "prompt": { + "name": "conversational-service-answer_question_restricted_reply", + "version": 9, + "config": {}, + "labels": ["latest", "staging", "production"], + "tags": ["conversational-service"], + "prompt": [ + {"role": "system", "content": "<>"}, + {"role": "user", "content": "{{text}}"}, + ], + }, + }, + "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b", + "litellm_api_version": "0.0.0", + "user_api_key_user_id": "default_user_id", + "user_api_key_spend": 0.0, + "user_api_key_metadata": {}, + "requester_ip_address": "127.0.0.1", + "model_group": "gpt-4o", + "model_group_size": 0, + "deployment": "gpt-4o", + "model_info": { + "id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4", + "db_model": False, + }, + "hidden_params": { + "headers": { + "date": "Wed, 25 Sep 2024 17:35:32 GMT", + "content-type": "application/json", + "transfer-encoding": "chunked", + "connection": "keep-alive", + "access-control-expose-headers": "X-Request-ID", + "openai-organization": "reliablekeystest", + "openai-processing-ms": "329", + "openai-version": "2020-10-01", + "strict-transport-security": "max-age=31536000; includeSubDomains; preload", + "x-ratelimit-limit-requests": "10000", + "x-ratelimit-limit-tokens": "30000000", + "x-ratelimit-remaining-requests": "9999", + "x-ratelimit-remaining-tokens": "29999980", + "x-ratelimit-reset-requests": "6ms", + "x-ratelimit-reset-tokens": "0s", + "x-request-id": "req_fdff3bfa11c391545d2042d46473214f", + "cf-cache-status": "DYNAMIC", + "set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None", + "x-content-type-options": "nosniff", + "server": "cloudflare", + "cf-ray": "8c8cc573becb232c-SJC", + "content-encoding": "gzip", + "alt-svc": 'h3=":443"; ma=86400', + }, + "additional_headers": { + "llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT", + "llm_provider-content-type": "application/json", + "llm_provider-transfer-encoding": "chunked", + "llm_provider-connection": "keep-alive", + "llm_provider-access-control-expose-headers": "X-Request-ID", + "llm_provider-openai-organization": "reliablekeystest", + "llm_provider-openai-processing-ms": "329", + "llm_provider-openai-version": "2020-10-01", + "llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload", + "llm_provider-x-ratelimit-limit-requests": "10000", + "llm_provider-x-ratelimit-limit-tokens": "30000000", + "llm_provider-x-ratelimit-remaining-requests": "9999", + "llm_provider-x-ratelimit-remaining-tokens": "29999980", + "llm_provider-x-ratelimit-reset-requests": "6ms", + "llm_provider-x-ratelimit-reset-tokens": "0s", + "llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f", + "llm_provider-cf-cache-status": "DYNAMIC", + "llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None", + "llm_provider-x-content-type-options": "nosniff", + "llm_provider-server": "cloudflare", + "llm_provider-cf-ray": "8c8cc573becb232c-SJC", + "llm_provider-content-encoding": "gzip", + "llm_provider-alt-svc": 'h3=":443"; ma=86400', + }, + "litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c", + "model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4", + "api_base": "https://api.openai.com", + "optional_params": {"stream": False, "max_retries": 0, "extra_body": {}}, + "response_cost": 0.00038, + }, + "litellm_response_cost": 0.00038, + "api_base": "https://api.openai.com/v1/", + "cache_hit": False, + } + _add_prompt_to_generation_params( + generation_params=generation_params, clean_metadata=clean_metadata + ) diff --git a/litellm/types/llms/vertex_ai.py b/litellm/types/llms/vertex_ai.py index 0637933e2..145aaa359 100644 --- a/litellm/types/llms/vertex_ai.py +++ b/litellm/types/llms/vertex_ai.py @@ -153,6 +153,7 @@ class GenerationConfig(TypedDict, total=False): presence_penalty: float frequency_penalty: float response_mime_type: Literal["text/plain", "application/json"] + response_schema: dict seed: int diff --git a/litellm/utils.py b/litellm/utils.py index 31150111f..cce70c6f8 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3239,8 +3239,15 @@ def get_optional_params( non_default_params=non_default_params, optional_params=optional_params, model=model, + drop_params=( + drop_params + if drop_params is not None and isinstance(drop_params, bool) + else False + ), ) - elif custom_llm_provider == "vertex_ai_beta": + elif custom_llm_provider == "vertex_ai_beta" or ( + custom_llm_provider == "vertex_ai" and "gemini" in model + ): supported_params = get_supported_openai_params( model=model, custom_llm_provider=custom_llm_provider ) @@ -3277,6 +3284,11 @@ def get_optional_params( non_default_params=non_default_params, optional_params=optional_params, model=model, + drop_params=( + drop_params + if drop_params is not None and isinstance(drop_params, bool) + else False + ), ) elif custom_llm_provider == "vertex_ai" and model in litellm.vertex_mistral_models: supported_params = get_supported_openai_params( @@ -3301,6 +3313,11 @@ def get_optional_params( non_default_params=non_default_params, optional_params=optional_params, model=model, + drop_params=( + drop_params + if drop_params is not None and isinstance(drop_params, bool) + else False + ), ) elif custom_llm_provider == "sagemaker": ## check if unsupported param passed in @@ -3710,6 +3727,7 @@ def get_optional_params( non_default_params=non_default_params, optional_params=optional_params, model=model, + drop_params=drop_params, ) elif custom_llm_provider == "openrouter": supported_params = get_supported_openai_params( @@ -3818,6 +3836,7 @@ def get_optional_params( non_default_params=non_default_params, optional_params=optional_params, model=model, + drop_params=drop_params, ) elif custom_llm_provider == "azure": supported_params = get_supported_openai_params( diff --git a/package-lock.json b/package-lock.json index c9559251b..2856be614 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,12 +5,53 @@ "packages": { "": { "dependencies": { + "prisma": "^5.17.0", "react-copy-to-clipboard": "^5.1.0" }, "devDependencies": { "@types/react-copy-to-clipboard": "^5.0.7" } }, + "node_modules/@prisma/debug": { + "version": "5.17.0", + "resolved": "https://registry.npmjs.org/@prisma/debug/-/debug-5.17.0.tgz", + "integrity": "sha512-l7+AteR3P8FXiYyo496zkuoiJ5r9jLQEdUuxIxNCN1ud8rdbH3GTxm+f+dCyaSv9l9WY+29L9czaVRXz9mULfg==" + }, + "node_modules/@prisma/engines": { + "version": "5.17.0", + "resolved": "https://registry.npmjs.org/@prisma/engines/-/engines-5.17.0.tgz", + "integrity": "sha512-+r+Nf+JP210Jur+/X8SIPLtz+uW9YA4QO5IXA+KcSOBe/shT47bCcRMTYCbOESw3FFYFTwe7vU6KTWHKPiwvtg==", + "hasInstallScript": true, + "dependencies": { + "@prisma/debug": "5.17.0", + "@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053", + "@prisma/fetch-engine": "5.17.0", + "@prisma/get-platform": "5.17.0" + } + }, + "node_modules/@prisma/engines-version": { + "version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053", + "resolved": "https://registry.npmjs.org/@prisma/engines-version/-/engines-version-5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053.tgz", + "integrity": "sha512-tUuxZZysZDcrk5oaNOdrBnnkoTtmNQPkzINFDjz7eG6vcs9AVDmA/F6K5Plsb2aQc/l5M2EnFqn3htng9FA4hg==" + }, + "node_modules/@prisma/fetch-engine": { + "version": "5.17.0", + "resolved": "https://registry.npmjs.org/@prisma/fetch-engine/-/fetch-engine-5.17.0.tgz", + "integrity": "sha512-ESxiOaHuC488ilLPnrv/tM2KrPhQB5TRris/IeIV4ZvUuKeaicCl4Xj/JCQeG9IlxqOgf1cCg5h5vAzlewN91Q==", + "dependencies": { + "@prisma/debug": "5.17.0", + "@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053", + "@prisma/get-platform": "5.17.0" + } + }, + "node_modules/@prisma/get-platform": { + "version": "5.17.0", + "resolved": "https://registry.npmjs.org/@prisma/get-platform/-/get-platform-5.17.0.tgz", + "integrity": "sha512-UlDgbRozCP1rfJ5Tlkf3Cnftb6srGrEQ4Nm3og+1Se2gWmCZ0hmPIi+tQikGDUVLlvOWx3Gyi9LzgRP+HTXV9w==", + "dependencies": { + "@prisma/debug": "5.17.0" + } + }, "node_modules/@types/prop-types": { "version": "15.7.12", "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.12.tgz", @@ -74,6 +115,21 @@ "node": ">=0.10.0" } }, + "node_modules/prisma": { + "version": "5.17.0", + "resolved": "https://registry.npmjs.org/prisma/-/prisma-5.17.0.tgz", + "integrity": "sha512-m4UWkN5lBE6yevqeOxEvmepnL5cNPEjzMw2IqDB59AcEV6w7D8vGljDLd1gPFH+W6gUxw9x7/RmN5dCS/WTPxA==", + "hasInstallScript": true, + "dependencies": { + "@prisma/engines": "5.17.0" + }, + "bin": { + "prisma": "build/index.js" + }, + "engines": { + "node": ">=16.13" + } + }, "node_modules/prop-types": { "version": "15.8.1", "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz", diff --git a/package.json b/package.json index 8c12b767d..849d94f08 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,6 @@ { "dependencies": { + "prisma": "^5.17.0", "react-copy-to-clipboard": "^5.1.0" }, "devDependencies": { diff --git a/tests/llm_translation/test_max_completion_tokens.py b/tests/llm_translation/test_max_completion_tokens.py index 6d5eb8e3c..2c61404e3 100644 --- a/tests/llm_translation/test_max_completion_tokens.py +++ b/tests/llm_translation/test_max_completion_tokens.py @@ -141,12 +141,12 @@ def test_all_model_configs(): "max_completion_tokens" in VertexAILlama3Config().get_supported_openai_params() ) assert VertexAILlama3Config().map_openai_params( - {"max_completion_tokens": 10}, {}, "llama3" + {"max_completion_tokens": 10}, {}, "llama3", drop_params=False ) == {"max_tokens": 10} assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params() assert VertexAIAi21Config().map_openai_params( - {"max_completion_tokens": 10}, {}, "llama3" + {"max_completion_tokens": 10}, {}, "llama3", drop_params=False ) == {"max_tokens": 10} from litellm.llms.fireworks_ai.chat.fireworks_ai_transformation import ( @@ -332,6 +332,7 @@ def test_all_model_configs(): model="gemini-1.0-pro", non_default_params={"max_completion_tokens": 10}, optional_params={}, + drop_params=False, ) == {"max_output_tokens": 10} assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params() diff --git a/tests/llm_translation/test_optional_params.py b/tests/llm_translation/test_optional_params.py index 1250dbe24..e22e724d0 100644 --- a/tests/llm_translation/test_optional_params.py +++ b/tests/llm_translation/test_optional_params.py @@ -600,3 +600,35 @@ def test_o1_model_params(): ) assert optional_params["seed"] == 10 assert optional_params["user"] == "John" + + +@pytest.mark.parametrize( + "temperature, expected_error", + [(0.2, True), (1, False)], +) +def test_o1_model_temperature_params(temperature, expected_error): + if expected_error: + with pytest.raises(litellm.UnsupportedParamsError): + get_optional_params( + model="o1-preview-2024-09-12", + custom_llm_provider="openai", + temperature=temperature, + ) + else: + get_optional_params( + model="o1-preview-2024-09-12", + custom_llm_provider="openai", + temperature=temperature, + ) + + +def test_unmapped_gemini_model_params(): + """ + Test if unmapped gemini model optional params are translated correctly + """ + optional_params = get_optional_params( + model="gemini-new-model", + custom_llm_provider="vertex_ai", + stop="stop_word", + ) + assert optional_params["stop_sequences"] == ["stop_word"]