diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 4f0ffa387e..6cd7469079 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -2,14 +2,20 @@ #### What this does #### # On success, log events to Prometheus -import dotenv, os -import requests # type: ignore +import datetime +import os +import subprocess +import sys import traceback -import datetime, subprocess, sys -import litellm, uuid -from litellm._logging import print_verbose, verbose_logger +import uuid from typing import Optional, Union +import dotenv +import requests # type: ignore + +import litellm +from litellm._logging import print_verbose, verbose_logger + class PrometheusLogger: # Class variables or attributes @@ -20,6 +26,8 @@ class PrometheusLogger: try: from prometheus_client import Counter, Gauge + from litellm.proxy.proxy_server import premium_user + self.litellm_llm_api_failed_requests_metric = Counter( name="litellm_llm_api_failed_requests_metric", documentation="Total number of failed LLM API calls via litellm", @@ -88,6 +96,31 @@ class PrometheusLogger: labelnames=["hashed_api_key", "api_key_alias"], ) + # Litellm-Enterprise Metrics + if premium_user is True: + # Remaining Rate Limit for model + self.litellm_remaining_requests_metric = Gauge( + "litellm_remaining_requests", + "remaining requests for model, returned from LLM API Provider", + labelnames=[ + "model_group", + "api_provider", + "api_base", + "litellm_model_name", + ], + ) + + self.litellm_remaining_tokens_metric = Gauge( + "litellm_remaining_tokens", + "remaining tokens for model, returned from LLM API Provider", + labelnames=[ + "model_group", + "api_provider", + "api_base", + "litellm_model_name", + ], + ) + except Exception as e: print_verbose(f"Got exception on init prometheus client {str(e)}") raise e @@ -104,6 +137,8 @@ class PrometheusLogger: ): try: # Define prometheus client + from litellm.proxy.proxy_server import premium_user + verbose_logger.debug( f"prometheus Logging - Enters logging function for model {kwargs}" ) @@ -199,6 +234,10 @@ class PrometheusLogger: user_api_key, user_api_key_alias ).set(_remaining_api_key_budget) + # set x-ratelimit headers + if premium_user is True: + self.set_remaining_tokens_requests_metric(kwargs) + ### FAILURE INCREMENT ### if "exception" in kwargs: self.litellm_llm_api_failed_requests_metric.labels( @@ -216,6 +255,58 @@ class PrometheusLogger: verbose_logger.debug(traceback.format_exc()) pass + def set_remaining_tokens_requests_metric(self, request_kwargs: dict): + try: + verbose_logger.debug("setting remaining tokens requests metric") + _response_headers = request_kwargs.get("response_headers") + _litellm_params = request_kwargs.get("litellm_params", {}) or {} + _metadata = _litellm_params.get("metadata", {}) + litellm_model_name = request_kwargs.get("model", None) + model_group = _metadata.get("model_group", None) + api_base = _metadata.get("api_base", None) + llm_provider = _litellm_params.get("custom_llm_provider", None) + + remaining_requests = None + remaining_tokens = None + # OpenAI / OpenAI Compatible headers + if ( + _response_headers + and "x-ratelimit-remaining-requests" in _response_headers + ): + remaining_requests = _response_headers["x-ratelimit-remaining-requests"] + if ( + _response_headers + and "x-ratelimit-remaining-tokens" in _response_headers + ): + remaining_tokens = _response_headers["x-ratelimit-remaining-tokens"] + verbose_logger.debug( + f"remaining requests: {remaining_requests}, remaining tokens: {remaining_tokens}" + ) + + if remaining_requests: + """ + "model_group", + "api_provider", + "api_base", + "litellm_model_name" + """ + self.litellm_remaining_requests_metric.labels( + model_group, llm_provider, api_base, litellm_model_name + ).set(remaining_requests) + + if remaining_tokens: + self.litellm_remaining_tokens_metric.labels( + model_group, llm_provider, api_base, litellm_model_name + ).set(remaining_tokens) + + except Exception as e: + verbose_logger.error( + "Prometheus Error: set_remaining_tokens_requests_metric. Exception occured - {}".format( + str(e) + ) + ) + return + def safe_get_remaining_budget( max_budget: Optional[float], spend: Optional[float]