Merge pull request #3112 from BerriAI/litellm_add_alert_types

[Feat] Allow user to select slack alert types to Opt In to
2024-04-18 16:21:33 -07:00 · 2024-04-18 16:21:33 -07:00 · eb04a929e6
commit eb04a929e6
parent 92630cd9da 1cda0db2ca
4 changed files with 105 additions and 5 deletions
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -697,6 +697,21 @@ class ConfigGeneralSettings(LiteLLMBase):
        None,
        description="List of alerting integrations. Today, just slack - `alerting: ['slack']`",
    )
    alert_types: Optional[
        List[
            Literal[
                "llm_exceptions",
                "llm_too_slow",
                "llm_requests_hanging",
                "budget_alerts",
                "db_exceptions",
            ]
        ]
    ] = Field(
        None,
        description="List of alerting types. By default it is all alerts",
    )
    alerting_threshold: Optional[int] = Field(
        None,
        description="sends alerts if requests hang for 5min+",
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -2283,6 +2283,7 @@ class ProxyConfig:
            proxy_logging_obj.update_values(
                alerting=general_settings.get("alerting", None),
                alerting_threshold=general_settings.get("alerting_threshold", 600),
                alert_types=general_settings.get("alert_types", None),
                redis_cache=redis_usage_cache,
            )
            ### CONNECT TO DATABASE ###
@ -2619,6 +2620,9 @@ class ProxyConfig:
            if "alerting" in _general_settings:
                general_settings["alerting"] = _general_settings["alerting"]
                proxy_logging_obj.alerting = general_settings["alerting"]
            if "alert_types" in _general_settings:
                general_settings["alert_types"] = _general_settings["alert_types"]
                proxy_logging_obj.alert_types = general_settings["alert_types"]
            # router settings
            _router_settings = config_data.get("router_settings", {})
@ -8196,10 +8200,12 @@ async def update_config(config_info: ConfigYAML):
            updated_general_settings = config_info.general_settings.dict(
                exclude_none=True
            )
-            config["general_settings"] = {
+
-                **updated_general_settings,
+            _existing_settings = config["general_settings"]
-                **config["general_settings"],
+            for k, v in updated_general_settings.items():
-            }
+                # overwrite existing settings with updated values
                _existing_settings[k] = v
            config["general_settings"] = _existing_settings
        if config_info.environment_variables is not None:
            config.setdefault("environment_variables", {})
@ -8372,7 +8378,15 @@ async def get_config():
                    )
                    _slack_env_vars[_var] = _decrypted_value
-            _data_to_return.append({"name": "slack", "variables": _slack_env_vars})
+            _alerting_types = proxy_logging_obj.alert_types
            _data_to_return.append(
                {
                    "name": "slack",
                    "variables": _slack_env_vars,
                    "alerting_types": _alerting_types,
                }
            )
        _router_settings = llm_router.get_settings()
        return {
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -64,12 +64,38 @@ class ProxyLogging:
        self.cache_control_check = _PROXY_CacheControlCheck()
        self.alerting: Optional[List] = None
        self.alerting_threshold: float = 300  # default to 5 min. threshold
        self.alert_types: List[
            Literal[
                "llm_exceptions",
                "llm_too_slow",
                "llm_requests_hanging",
                "budget_alerts",
                "db_exceptions",
            ]
        ] = [
            "llm_exceptions",
            "llm_too_slow",
            "llm_requests_hanging",
            "budget_alerts",
            "db_exceptions",
        ]
    def update_values(
        self,
        alerting: Optional[List],
        alerting_threshold: Optional[float],
        redis_cache: Optional[RedisCache],
        alert_types: Optional[
            List[
                Literal[
                    "llm_exceptions",
                    "llm_too_slow",
                    "llm_requests_hanging",
                    "budget_alerts",
                    "db_exceptions",
                ]
            ]
        ] = None,
    ):
        self.alerting = alerting
        if alerting_threshold is not None:
@ -78,6 +104,9 @@ class ProxyLogging:
        if redis_cache is not None:
            self.internal_usage_cache.redis_cache = redis_cache
        if alert_types is not None:
            self.alert_types = alert_types
    def _init_litellm_callbacks(self):
        print_verbose(f"INITIALIZING LITELLM CALLBACKS!")
        litellm.callbacks.append(self.max_parallel_request_limiter)
@ -213,6 +242,8 @@ class ProxyLogging:
    ):
        if self.alerting is None:
            return
        if "llm_too_slow" not in self.alert_types:
            return
        time_difference_float, model, api_base, messages = (
            self._response_taking_too_long_callback(
                kwargs=kwargs,
@ -259,6 +290,8 @@ class ProxyLogging:
        if type == "hanging_request":
            # Simulate a long-running operation that could take more than 5 minutes
            if "llm_requests_hanging" not in self.alert_types:
                return
            await asyncio.sleep(
                self.alerting_threshold
            )  # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
@ -307,6 +340,8 @@ class ProxyLogging:
        if self.alerting is None:
            # do nothing if alerting is not switched on
            return
        if "budget_alerts" not in self.alert_types:
            return
        _id: str = "default_id"  # used for caching
        if type == "user_and_proxy_budget":
            user_info = dict(user_info)
@ -465,6 +500,8 @@ class ProxyLogging:
        Currently only logs exceptions to sentry
        """
        ### ALERTING ###
        if "db_exceptions" not in self.alert_types:
            return
        if isinstance(original_exception, HTTPException):
            if isinstance(original_exception.detail, str):
                error_message = original_exception.detail
@ -499,6 +536,8 @@ class ProxyLogging:
        """
        ### ALERTING ###
        if "llm_exceptions" not in self.alert_types:
            return
        asyncio.create_task(
            self.alerting_handler(
                message=f"LLM API call failed: {str(original_exception)}", level="High"
--- a/ui/litellm-dashboard/src/components/settings.tsx
+++ b/ui/litellm-dashboard/src/components/settings.tsx
@ -36,6 +36,7 @@ const Settings: React.FC<SettingsPageProps> = ({
  const [isModalVisible, setIsModalVisible] = useState(false);
  const [form] = Form.useForm();
  const [selectedCallback, setSelectedCallback] = useState<string | null>(null);
  const [selectedAlertValues, setSelectedAlertValues] = useState([]);
  useEffect(() => {
    if (!accessToken || !userRole || !userID) {
@ -59,6 +60,12 @@ const Settings: React.FC<SettingsPageProps> = ({
    setSelectedCallback(null);
  };
  const handleChange = (values) => {
    setSelectedAlertValues(values);
    // Here, you can perform any additional logic with the selected values
    console.log('Selected values:', values);
  };
  const handleSaveChanges = (callback: any) => {
    if (!accessToken) {
      return;
@ -68,8 +75,14 @@ const Settings: React.FC<SettingsPageProps> = ({
      Object.entries(callback.variables).map(([key, value]) => [key, (document.querySelector(`input[name="${key}"]`) as HTMLInputElement)?.value || value])
    );
    console.log("updatedVariables", updatedVariables);
    console.log("updateAlertTypes", selectedAlertValues);
    const payload = {
      environment_variables: updatedVariables,
      general_settings: {
        alert_types: selectedAlertValues
      }
    };
    try {
@ -169,6 +182,25 @@ const Settings: React.FC<SettingsPageProps> = ({
  </li>
 ))}
        </ul>
        {callback.alerting_types && (
          <div>
            <Text className="mt-2">Alerting Types</Text>
            <Select
              mode="multiple"
              style={{ width: '100%' }}
              placeholder="Select Alerting Types"
              optionLabelProp="label"
              onChange={handleChange}
              defaultValue={callback.alerting_types}
            >
              {callback.alerting_types.map((type: string) => (
                <Select.Option key={type} value={type} label={type}>
                  {type}
                </Select.Option>
              ))}
            </Select>
          </div>
        )}
        <Button className="mt-2" onClick={() => handleSaveChanges(callback)}>
          Save Changes
        </Button>