diff --git a/litellm/integrations/email_templates/templates.py b/litellm/integrations/email_templates/templates.py new file mode 100644 index 000000000..7029e8ce1 --- /dev/null +++ b/litellm/integrations/email_templates/templates.py @@ -0,0 +1,62 @@ +""" +Email Templates used by the LiteLLM Email Service in slack_alerting.py +""" + +KEY_CREATED_EMAIL_TEMPLATE = """ + LiteLLM Logo + +

Hi {recipient_email},
+ + I'm happy to provide you with an OpenAI Proxy API Key, loaded with ${key_budget} per month.

+ + + Key:

{key_token}

+
+ +

Usage Example

+ + Detailed Documentation on Usage with OpenAI Python SDK, Langchain, LlamaIndex, Curl + +
+
+                    import openai
+                    client = openai.OpenAI(
+                        api_key="{key_token}",
+                        base_url={{base_url}}
+                    )
+
+                    response = client.chat.completions.create(
+                        model="gpt-3.5-turbo", # model to send to the proxy
+                        messages = [
+                            {{
+                                "role": "user",
+                                "content": "this is a test request, write a short poem"
+                            }}
+                        ]
+                    )
+
+                    
+ + + If you have any questions, please send an email to {email_support_contact}

+ + Best,
+ The LiteLLM team
+""" + + +USER_INVITED_EMAIL_TEMPLATE = """ + LiteLLM Logo + +

Hi {recipient_email},
+ + You were invited to use OpenAI Proxy API for team {team_name}

+ + Get Started here

+ + + If you have any questions, please send an email to {email_support_contact}

+ + Best,
+ The LiteLLM team
+""" diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 49a8d0e2c..8c271c684 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -18,6 +18,7 @@ from litellm.proxy._types import WebhookEvent import random from typing import TypedDict from openai import APIError +from .email_templates.templates import * import litellm.types from litellm.types.router import LiteLLM_Params @@ -1190,105 +1191,106 @@ Model Info: raise ValueError( f"Trying to Customize Email Alerting\n {CommonProxyErrors.not_premium_user.value}" ) + return - async def send_key_created_email(self, webhook_event: WebhookEvent) -> bool: - from litellm.proxy.utils import send_email + async def send_key_created_or_user_invited_email( + self, webhook_event: WebhookEvent + ) -> bool: + try: + from litellm.proxy.utils import send_email - if self.alerting is None or "email" not in self.alerting: - # do nothing if user does not want email alerts + if self.alerting is None or "email" not in self.alerting: + # do nothing if user does not want email alerts + return False + from litellm.proxy.proxy_server import premium_user, prisma_client + + email_logo_url = os.getenv("SMTP_SENDER_LOGO", None) + email_support_contact = os.getenv("EMAIL_SUPPORT_CONTACT", None) + await self._check_if_using_premium_email_feature( + premium_user, email_logo_url, email_support_contact + ) + if email_logo_url is None: + email_logo_url = LITELLM_LOGO_URL + if email_support_contact is None: + email_support_contact = LITELLM_SUPPORT_CONTACT + + event_name = webhook_event.event_message + recipient_email = webhook_event.user_email + recipient_user_id = webhook_event.user_id + if ( + recipient_email is None + and recipient_user_id is not None + and prisma_client is not None + ): + user_row = await prisma_client.db.litellm_usertable.find_unique( + where={"user_id": recipient_user_id} + ) + + if user_row is not None: + recipient_email = user_row.user_email + + key_name = webhook_event.key_alias + key_token = webhook_event.token + key_budget = webhook_event.max_budget + base_url = os.getenv("PROXY_BASE_URL", "http://0.0.0.0:4000") + + email_html_content = "Alert from LiteLLM Server" + if recipient_email is None: + verbose_proxy_logger.error( + "Trying to send email alert to no recipient", + extra=webhook_event.dict(), + ) + + if webhook_event.event == "key_created": + email_html_content = KEY_CREATED_EMAIL_TEMPLATE.format( + email_logo_url=email_logo_url, + recipient_email=recipient_email, + key_budget=key_budget, + key_token=key_token, + base_url=base_url, + email_support_contact=email_support_contact, + ) + elif webhook_event.event == "internal_user_created": + # GET TEAM NAME + team_id = webhook_event.team_id + team_name = "Default Team" + if team_id is not None and prisma_client is not None: + team_row = await prisma_client.db.litellm_teamtable.find_unique( + where={"team_id": team_id} + ) + if team_row is not None: + team_name = team_row.team_alias or "-" + email_html_content = USER_INVITED_EMAIL_TEMPLATE.format( + email_logo_url=email_logo_url, + recipient_email=recipient_email, + team_name=team_name, + base_url=base_url, + email_support_contact=email_support_contact, + ) + else: + verbose_proxy_logger.error( + "Trying to send email alert on unknown webhook event", + extra=webhook_event.model_dump(), + ) + + payload = webhook_event.model_dump_json() + email_event = { + "to": recipient_email, + "subject": f"LiteLLM: {event_name}", + "html": email_html_content, + } + + response = await send_email( + receiver_email=email_event["to"], + subject=email_event["subject"], + html=email_event["html"], + ) + + return True + + except Exception as e: + verbose_proxy_logger.error("Error sending email alert %s", str(e)) return False - from litellm.proxy.proxy_server import premium_user, prisma_client - - email_logo_url = os.getenv("SMTP_SENDER_LOGO", None) - email_support_contact = os.getenv("EMAIL_SUPPORT_CONTACT", None) - await self._check_if_using_premium_email_feature( - premium_user, email_logo_url, email_support_contact - ) - if email_logo_url is None: - email_logo_url = LITELLM_LOGO_URL - if email_support_contact is None: - email_support_contact = LITELLM_SUPPORT_CONTACT - - event_name = webhook_event.event_message - recipient_email = webhook_event.user_email - recipient_user_id = webhook_event.user_id - if ( - recipient_email is None - and recipient_user_id is not None - and prisma_client is not None - ): - user_row = await prisma_client.db.litellm_usertable.find_unique( - where={"user_id": recipient_user_id} - ) - - if user_row is not None: - recipient_email = user_row.user_email - - key_name = webhook_event.key_alias - key_token = webhook_event.token - key_budget = webhook_event.max_budget - - email_html_content = "Alert from LiteLLM Server" - if recipient_email is None: - verbose_proxy_logger.error( - "Trying to send email alert to no recipient", extra=webhook_event.dict() - ) - email_html_content = f""" - LiteLLM Logo - -

Hi {recipient_email},
- - I'm happy to provide you with an OpenAI Proxy API Key, loaded with ${key_budget} per month.

- - - Key:

{key_token}

-
- -

Usage Example

- - Detailed Documentation on Usage with OpenAI Python SDK, Langchain, LlamaIndex, Curl - -
-
-            import openai
-            client = openai.OpenAI(
-                api_key="{key_token}",
-                base_url={os.getenv("PROXY_BASE_URL", "http://0.0.0.0:4000")}
-            )
-
-            response = client.chat.completions.create(
-                model="gpt-3.5-turbo", # model to send to the proxy
-                messages = [
-                    {{
-                        "role": "user",
-                        "content": "this is a test request, write a short poem"
-                    }}
-                ]
-            )
-
-            
- - - If you have any questions, please send an email to {email_support_contact}

- - Best,
- The LiteLLM team
- """ - - payload = webhook_event.model_dump_json() - email_event = { - "to": recipient_email, - "subject": f"LiteLLM: {event_name}", - "html": email_html_content, - } - - response = await send_email( - receiver_email=email_event["to"], - subject=email_event["subject"], - html=email_event["html"], - ) - - return False async def send_email_alert_using_smtp(self, webhook_event: WebhookEvent) -> bool: """ diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index cce9ae409..1daeea6a4 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -380,6 +380,18 @@ "output_cost_per_second": 0.0001, "litellm_provider": "azure" }, + "azure/gpt-4o": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, "azure/gpt-4-turbo-2024-04-09": { "max_tokens": 4096, "max_input_tokens": 128000, diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 6df6b4fe4..b86df4e9b 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -407,7 +407,9 @@ class ProxyChatCompletionRequest(LiteLLMBase): deployment_id: Optional[str] = None request_timeout: Optional[int] = None - model_config = ConfigDict(extra="allow") # allow params not defined here, these fall in litellm.completion(**kwargs) + model_config = ConfigDict( + extra="allow" + ) # allow params not defined here, these fall in litellm.completion(**kwargs) class ModelInfoDelete(LiteLLMBase): @@ -508,6 +510,7 @@ class GenerateKeyRequest(GenerateRequestBase): ) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {} model_config = ConfigDict(protected_namespaces=()) + send_invite_email: Optional[bool] = None class GenerateKeyResponse(GenerateKeyRequest): @@ -579,10 +582,24 @@ class NewUserRequest(GenerateKeyRequest): auto_create_key: bool = ( True # flag used for returning a key as part of the /user/new response ) + send_invite_email: Optional[bool] = None class NewUserResponse(GenerateKeyResponse): max_budget: Optional[float] = None + user_email: Optional[str] = None + user_role: Optional[ + Literal[ + LitellmUserRoles.PROXY_ADMIN, + LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY, + LitellmUserRoles.INTERNAL_USER, + LitellmUserRoles.INTERNAL_USER_VIEW_ONLY, + LitellmUserRoles.TEAM, + LitellmUserRoles.CUSTOMER, + ] + ] = None + teams: Optional[list] = None + organization_id: Optional[str] = None class UpdateUserRequest(GenerateRequestBase): @@ -918,7 +935,9 @@ class KeyManagementSettings(LiteLLMBase): class TeamDefaultSettings(LiteLLMBase): team_id: str - model_config = ConfigDict(extra="allow") # allow params not defined here, these fall in litellm.completion(**kwargs) + model_config = ConfigDict( + extra="allow" + ) # allow params not defined here, these fall in litellm.completion(**kwargs) class DynamoDBArgs(LiteLLMBase): @@ -1112,6 +1131,7 @@ class LiteLLM_VerificationToken(LiteLLMBase): model_config = ConfigDict(protected_namespaces=()) + class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken): """ Combined view of litellm verification token + litellm team table (select values) @@ -1285,6 +1305,7 @@ class WebhookEvent(CallInfo): "threshold_crossed", "projected_limit_exceeded", "key_created", + "internal_user_created", "spend_tracked", ] event_group: Literal["internal_user", "key", "team", "proxy", "customer"] diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index ff7343c3a..1d6306d0c 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -21,7 +21,7 @@ model_list: general_settings: master_key: sk-1234 - alerting: ["slack"] + alerting: ["slack", "email"] litellm_settings: callbacks: custom_callbacks1.proxy_handler_instance \ No newline at end of file diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 4759ea8c4..cd89b8a5b 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3217,6 +3217,9 @@ def _duration_in_seconds(duration: str): async def generate_key_helper_fn( + request_type: Literal[ + "user", "key" + ], # identifies if this request is from /user/new or /key/generate duration: Optional[str], models: list, aliases: dict, @@ -3248,6 +3251,7 @@ async def generate_key_helper_fn( teams: Optional[list] = None, organization_id: Optional[str] = None, table_name: Optional[Literal["key", "user"]] = None, + send_invite_email: Optional[bool] = None, ): global prisma_client, custom_db_client, user_api_key_cache, litellm_proxy_admin_name, premium_user @@ -3282,7 +3286,7 @@ async def generate_key_helper_fn( permissions_json = json.dumps(permissions) metadata_json = json.dumps(metadata) model_max_budget_json = json.dumps(model_max_budget) - user_role = user_role or "app_user" + user_role = user_role tpm_limit = tpm_limit rpm_limit = rpm_limit allowed_cache_controls = allowed_cache_controls @@ -3348,7 +3352,7 @@ async def generate_key_helper_fn( "get_spend_routes" in saved_token["permissions"] and premium_user != True ): - raise Exception( + raise ValueError( "get_spend_routes permission is only available for LiteLLM Enterprise users" ) @@ -3405,6 +3409,10 @@ async def generate_key_helper_fn( # Add budget related info in key_data - this ensures it's returned key_data["budget_id"] = budget_id + + if request_type == "user": + # if this is a /user/new request update the key_date with user_data fields + key_data.update(user_data) return key_data @@ -3752,6 +3760,7 @@ async def startup_event(): ) asyncio.create_task( generate_key_helper_fn( + request_type="user", duration=None, models=[], aliases={}, @@ -3774,6 +3783,7 @@ async def startup_event(): # add proxy budget to db in the user table asyncio.create_task( generate_key_helper_fn( + request_type="user", user_id=litellm_proxy_budget_name, duration=None, models=[], @@ -3796,7 +3806,13 @@ async def startup_event(): if custom_db_client is not None and master_key is not None: # add master key to db await generate_key_helper_fn( - duration=None, models=[], aliases={}, config={}, spend=0, token=master_key + request_type="key", + duration=None, + models=[], + aliases={}, + config={}, + spend=0, + token=master_key, ) ### CHECK IF VIEW EXISTS ### @@ -6004,6 +6020,7 @@ async def generate_key_fn( - aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models - config: Optional[dict] - any key-specific configs, overrides config in config.yaml - spend: Optional[int] - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend + - send_invite_email: Optional[bool] - Whether to send an invite email to the user_id, with the generate key - max_budget: Optional[float] - Specify max budget for a given key. - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x. - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" } @@ -6132,29 +6149,37 @@ async def generate_key_fn( if "budget_duration" in data_json: data_json["key_budget_duration"] = data_json.pop("budget_duration", None) - response = await generate_key_helper_fn(**data_json, table_name="key") + response = await generate_key_helper_fn( + request_type="key", **data_json, table_name="key" + ) response["soft_budget"] = ( data.soft_budget ) # include the user-input soft budget in the response - event = WebhookEvent( - event="key_created", - event_group="key", - event_message=f"API Key Created", - token=response.get("token", ""), - spend=response.get("spend", 0.0), - max_budget=response.get("max_budget", 0.0), - user_id=response.get("user_id", None), - team_id=response.get("team_id", "Default Team"), - key_alias=response.get("key_alias", None), - ) - # If user configured email alerting - send an Email letting their end-user know the key was created - asyncio.create_task( - proxy_logging_obj.slack_alerting_instance.send_key_created_email( - webhook_event=event, + if data.send_invite_email is True: + if "email" not in general_settings.get("alerting", []): + raise ValueError( + "Email alerting not setup on config.yaml. Please set `alerting=['email']. \nDocs: https://docs.litellm.ai/docs/proxy/email`" + ) + event = WebhookEvent( + event="key_created", + event_group="key", + event_message=f"API Key Created", + token=response.get("token", ""), + spend=response.get("spend", 0.0), + max_budget=response.get("max_budget", 0.0), + user_id=response.get("user_id", None), + team_id=response.get("team_id", "Default Team"), + key_alias=response.get("key_alias", None), + ) + + # If user configured email alerting - send an Email letting their end-user know the key was created + asyncio.create_task( + proxy_logging_obj.slack_alerting_instance.send_key_created_or_user_invited_email( + webhook_event=event, + ) ) - ) return GenerateKeyResponse(**response) except Exception as e: @@ -8116,6 +8141,7 @@ async def new_user(data: NewUserRequest): - teams: Optional[list] - specify a list of team id's a user belongs to. - organization_id: Optional[str] - specify the org a user belongs to. - user_email: Optional[str] - Specify a user email. + - send_invite_email: Optional[bool] - Specify if an invite email should be sent. - user_role: Optional[str] - Specify a user role - "admin", "app_owner", "app_user" - max_budget: Optional[float] - Specify max budget for a given user. - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models) @@ -8137,7 +8163,7 @@ async def new_user(data: NewUserRequest): data_json["table_name"] = ( "user" # only create a user, don't create key if 'auto_create_key' set to False ) - response = await generate_key_helper_fn(**data_json) + response = await generate_key_helper_fn(request_type="user", **data_json) # Admin UI Logic # if team_id passed add this user to the team @@ -8152,11 +8178,42 @@ async def new_user(data: NewUserRequest): ), ) ) + + if data.send_invite_email is True: + # check if user has setup email alerting + if "email" not in general_settings.get("alerting", []): + raise ValueError( + "Email alerting not setup on config.yaml. Please set `alerting=['email']. \nDocs: https://docs.litellm.ai/docs/proxy/email`" + ) + + event = WebhookEvent( + event="internal_user_created", + event_group="internal_user", + event_message=f"Welcome to LiteLLM Proxy", + token=response.get("token", ""), + spend=response.get("spend", 0.0), + max_budget=response.get("max_budget", 0.0), + user_id=response.get("user_id", None), + user_email=response.get("user_email", None), + team_id=response.get("team_id", "Default Team"), + key_alias=response.get("key_alias", None), + ) + + # If user configured email alerting - send an Email letting their end-user know the key was created + asyncio.create_task( + proxy_logging_obj.slack_alerting_instance.send_key_created_or_user_invited_email( + webhook_event=event, + ) + ) + return NewUserResponse( key=response.get("token", ""), expires=response.get("expires", None), max_budget=response["max_budget"], user_id=response["user_id"], + user_role=response.get("user_role", None), + user_email=response.get("user_email", None), + teams=response.get("teams", None), team_id=response.get("team_id", None), metadata=response.get("metadata", None), models=response.get("models", None), @@ -8213,11 +8270,13 @@ async def user_auth(request: Request): if response is not None: user_id = response.user_id response = await generate_key_helper_fn( - **{"duration": "24hr", "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id} # type: ignore + request_type="key", + **{"duration": "24hr", "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id}, # type: ignore ) else: ### else - create new user response = await generate_key_helper_fn( - **{"duration": "24hr", "models": [], "aliases": {}, "config": {}, "spend": 0, "user_email": user_email} # type: ignore + request_type="key", + **{"duration": "24hr", "models": [], "aliases": {}, "config": {}, "spend": 0, "user_email": user_email}, # type: ignore ) base_url = os.getenv("LITELLM_HOSTED_UI", "https://dashboard.litellm.ai/") @@ -11709,7 +11768,8 @@ async def login(request: Request): ) if os.getenv("DATABASE_URL") is not None: response = await generate_key_helper_fn( - **{"user_role": LitellmUserRoles.PROXY_ADMIN, "duration": "2hr", "key_max_budget": 5, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": key_user_id, "team_id": "litellm-dashboard"} # type: ignore + request_type="key", + **{"user_role": LitellmUserRoles.PROXY_ADMIN, "duration": "2hr", "key_max_budget": 5, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": key_user_id, "team_id": "litellm-dashboard"}, # type: ignore ) else: raise ProxyException( @@ -11810,6 +11870,7 @@ async def onboarding(invite_link: str): response = await generate_key_helper_fn( **{ "user_role": LitellmUserRoles.PROXY_ADMIN, + request_type="key", "duration": "2hr", "key_max_budget": 5, "models": [], @@ -12125,8 +12186,11 @@ async def auth_callback(request: Request): verbose_proxy_logger.info( f"user_defined_values for creating ui key: {user_defined_values}" ) + + default_ui_key_values.update(user_defined_values) + default_ui_key_values["request_type"] = "key" response = await generate_key_helper_fn( - **default_ui_key_values, **user_defined_values # type: ignore + **default_ui_key_values, # type: ignore ) key = response["token"] # type: ignore user_id = response["user_id"] # type: ignore @@ -13231,7 +13295,7 @@ async def health_services_endpoint( # use create task - this can take 10 seconds. don't keep ui users waiting for notification to check their email asyncio.create_task( - proxy_logging_obj.slack_alerting_instance.send_key_created_email( + proxy_logging_obj.slack_alerting_instance.send_key_created_or_user_invited_email( webhook_event=webhook_event ) ) diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index c818b4f4a..148d32cd9 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -1912,7 +1912,8 @@ async def test_key_with_no_permissions(prisma_client): await litellm.proxy.proxy_server.prisma_client.connect() try: response = await generate_key_helper_fn( - **{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": "ishaan", "team_id": "litellm-dashboard"} # type: ignore + request_type="key", + **{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": "ishaan", "team_id": "litellm-dashboard"}, # type: ignore ) print(response) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index cce9ae409..1daeea6a4 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -380,6 +380,18 @@ "output_cost_per_second": 0.0001, "litellm_provider": "azure" }, + "azure/gpt-4o": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, "azure/gpt-4-turbo-2024-04-09": { "max_tokens": 4096, "max_input_tokens": 128000,