From 4b7e102187bcc56163bfbfcac38fa85da0dc0e79 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Mar 2024 14:48:38 -0700 Subject: [PATCH 1/6] (v0) prometheus metric --- litellm/proxy/proxy_server.py | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index f718e8901..b8cd1a9fa 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -983,6 +983,42 @@ def cost_tracking(): litellm.success_callback.append(_PROXY_track_cost_callback) # type: ignore +from prometheus_client import Counter, REGISTRY +from prometheus_client import make_asgi_app + +# Add prometheus asgi middleware to route /metrics requests +metrics_app = make_asgi_app() +app.mount("/metrics", metrics_app) +try: + calls_metric = Counter("calls_metric", "Measure of calls") + tokens_metric = Counter("tokens_metric", "Measure of tokens") + spend_metric = Counter("spend_metric", "Measure of spend") +except: + pass + +# # Define Prometheus counters for metrics +# Register metrics with the default registry + + +def track_prometheus_metrics( + kwargs, # kwargs to completion + completion_response, # response from completion + start_time, + end_time, # start/end time +): + global calls_metric + user: str = ("issues",) + key: str = ("sk-02Wr4IAlN3NvPXvL5JVvDA",) + model: str = ("gpt-3.5-turbo",) + budgets = 1 + tokens = 20 + # print("incrementing prometheus metrics") + calls_metric.labels(user, key, model).inc() + + +litellm.callbacks.append(track_prometheus_metrics) + + async def _PROXY_track_cost_callback( kwargs, # kwargs to completion completion_response: litellm.ModelResponse, # response from completion From d2522752a66125220c38a9c6d0da7a701251023b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Mar 2024 15:44:36 -0700 Subject: [PATCH 2/6] (feat) prometheus metric collector --- litellm/integrations/prometheus.py | 87 ++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 litellm/integrations/prometheus.py diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py new file mode 100644 index 000000000..999191a8c --- /dev/null +++ b/litellm/integrations/prometheus.py @@ -0,0 +1,87 @@ +# used for /metrics endpoint on LiteLLM Proxy +#### What this does #### +# On success + failure, log events to Supabase + +import dotenv, os +import requests + +dotenv.load_dotenv() # Loading env variables using dotenv +import traceback +import datetime, subprocess, sys +import litellm, uuid +from litellm._logging import print_verbose, verbose_logger + + +class prometheusLogger: + # Class variables or attributes + def __init__( + self, + **kwargs, + ): + try: + verbose_logger.debug(f"in init prometheus metrics") + from prometheus_client import Counter + + self.litellm_requests_metric = Counter( + name="litellm_requests_metric", + documentation="Total number of LLM calls to litellm", + labelnames=["user", "key", "model"], + ) + + # Counter for spend + self.litellm_spend_metric = Counter( + "litellm_spend_metric", + "Total spend on LLM requests", + labelnames=["user", "key", "model"], + ) + + # Counter for total_output_tokens + self.litellm_tokens_metric = Counter( + "litellm_total_tokens", + "Total number of input + output tokens from LLM requests", + labelnames=["user", "key", "model"], + ) + except Exception as e: + print_verbose(f"Got exception on init s3 client {str(e)}") + raise e + + async def _async_log_event( + self, kwargs, response_obj, start_time, end_time, print_verbose, user_id + ): + self.log_event(kwargs, response_obj, start_time, end_time, print_verbose) + + def log_event( + self, kwargs, response_obj, start_time, end_time, user_id, print_verbose + ): + try: + # Define prometheus client + verbose_logger.debug( + f"prometheus Logging - Enters logging function for model {kwargs}" + ) + + # unpack kwargs + model = kwargs.get("model", "") + response_cost = kwargs.get("response_cost", 0.0) + litellm_params = kwargs.get("litellm_params", {}) or {} + proxy_server_request = litellm_params.get("proxy_server_request") or {} + end_user_id = proxy_server_request.get("body", {}).get("user", None) + user_api_key = litellm_params.get("metadata", {}).get("api_key", None) + tokens_used = response_obj.get("usage", {}).get("total_tokens", 0) + + print_verbose( + f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}" + ) + + self.litellm_requests_metric.labels(end_user_id, user_api_key, model).inc() + self.litellm_spend_metric.labels(end_user_id, user_api_key, model).inc( + response_cost + ) + self.litellm_tokens_metric.labels(end_user_id, user_api_key, model).inc( + tokens_used + ) + except Exception as e: + traceback.print_exc() + verbose_logger.debug( + f"prometheus Layer Error - {str(e)}\n{traceback.format_exc()}" + ) + pass From aa1c48045218723bb530ff45c75767acabde78d6 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Mar 2024 15:49:23 -0700 Subject: [PATCH 3/6] (feat) using prom litellm --- litellm/proxy/proxy_server.py | 45 +++++++---------------------------- 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index b8cd1a9fa..43606705d 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -983,42 +983,6 @@ def cost_tracking(): litellm.success_callback.append(_PROXY_track_cost_callback) # type: ignore -from prometheus_client import Counter, REGISTRY -from prometheus_client import make_asgi_app - -# Add prometheus asgi middleware to route /metrics requests -metrics_app = make_asgi_app() -app.mount("/metrics", metrics_app) -try: - calls_metric = Counter("calls_metric", "Measure of calls") - tokens_metric = Counter("tokens_metric", "Measure of tokens") - spend_metric = Counter("spend_metric", "Measure of spend") -except: - pass - -# # Define Prometheus counters for metrics -# Register metrics with the default registry - - -def track_prometheus_metrics( - kwargs, # kwargs to completion - completion_response, # response from completion - start_time, - end_time, # start/end time -): - global calls_metric - user: str = ("issues",) - key: str = ("sk-02Wr4IAlN3NvPXvL5JVvDA",) - model: str = ("gpt-3.5-turbo",) - budgets = 1 - tokens = 20 - # print("incrementing prometheus metrics") - calls_metric.labels(user, key, model).inc() - - -litellm.callbacks.append(track_prometheus_metrics) - - async def _PROXY_track_cost_callback( kwargs, # kwargs to completion completion_response: litellm.ModelResponse, # response from completion @@ -1867,6 +1831,15 @@ class ProxyConfig: # these are litellm callbacks - "langfuse", "sentry", "wandb" else: litellm.success_callback.append(callback) + if "prometheus" in callback: + verbose_proxy_logger.debug( + "Starting Prometheus Metrics on /metrics" + ) + from prometheus_client import make_asgi_app + + # Add prometheus asgi middleware to route /metrics requests + metrics_app = make_asgi_app() + app.mount("/metrics", metrics_app) print( # noqa f"{blue_color_code} Initialized Success Callbacks - {litellm.success_callback} {reset_color_code}" ) # noqa From c196186190873d0fb698092b2d0f2c24e91dde0f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Mar 2024 17:28:33 -0700 Subject: [PATCH 4/6] (fix) add /metrics to utils.py --- litellm/integrations/prometheus.py | 6 ++--- litellm/utils.py | 35 +++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 999191a8c..fc8732dfd 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -12,7 +12,7 @@ import litellm, uuid from litellm._logging import print_verbose, verbose_logger -class prometheusLogger: +class PrometheusLogger: # Class variables or attributes def __init__( self, @@ -42,7 +42,7 @@ class prometheusLogger: labelnames=["user", "key", "model"], ) except Exception as e: - print_verbose(f"Got exception on init s3 client {str(e)}") + print_verbose(f"Got exception on init prometheus client {str(e)}") raise e async def _async_log_event( @@ -65,7 +65,7 @@ class prometheusLogger: litellm_params = kwargs.get("litellm_params", {}) or {} proxy_server_request = litellm_params.get("proxy_server_request") or {} end_user_id = proxy_server_request.get("body", {}).get("user", None) - user_api_key = litellm_params.get("metadata", {}).get("api_key", None) + user_api_key = litellm_params.get("metadata", {}).get("user_api_key", None) tokens_used = response_obj.get("usage", {}).get("total_tokens", 0) print_verbose( diff --git a/litellm/utils.py b/litellm/utils.py index 5f11eb989..bcef061d6 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -66,6 +66,7 @@ from .integrations.weights_biases import WeightsBiasesLogger from .integrations.custom_logger import CustomLogger from .integrations.langfuse import LangFuseLogger from .integrations.datadog import DataDogLogger +from .integrations.prometheus import PrometheusLogger from .integrations.dynamodb import DyanmoDBLogger from .integrations.s3 import S3Logger from .integrations.clickhouse import ClickhouseLogger @@ -123,6 +124,7 @@ weightsBiasesLogger = None customLogger = None langFuseLogger = None dataDogLogger = None +prometheusLogger = None dynamoLogger = None s3Logger = None genericAPILogger = None @@ -1502,6 +1504,35 @@ class Logging: user_id=kwargs.get("user", None), print_verbose=print_verbose, ) + if callback == "prometheus": + global prometheusLogger + verbose_logger.debug("reaches prometheus for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose( + "reaches prometheus for streaming logging!" + ) + result = kwargs["complete_streaming_response"] + prometheusLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) if callback == "generic": global genericAPILogger verbose_logger.debug("reaches langfuse for success logging!") @@ -6111,7 +6142,7 @@ def validate_environment(model: Optional[str] = None) -> dict: def set_callbacks(callback_list, function_id=None): - global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger, dataDogLogger + global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger try: for callback in callback_list: print_verbose(f"callback: {callback}") @@ -6179,6 +6210,8 @@ def set_callbacks(callback_list, function_id=None): langFuseLogger = LangFuseLogger() elif callback == "datadog": dataDogLogger = DataDogLogger() + elif callback == "prometheus": + prometheusLogger = PrometheusLogger() elif callback == "dynamodb": dynamoLogger = DyanmoDBLogger() elif callback == "s3": From bed1c0478a13f9d609f9b975468334b309ac4637 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Mar 2024 17:30:07 -0700 Subject: [PATCH 5/6] (fix) include prom endpoint --- litellm/proxy/proxy_config.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 049cf1d3b..c399cd3de 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -14,4 +14,6 @@ general_settings: master_key: sk-1234 router_settings: set_verbose: True - debug_level: "DEBUG" \ No newline at end of file + debug_level: "DEBUG" +litellm_settings: + success_callback: ["prometheus"] \ No newline at end of file From 827a8af093b81790a4625e31a7b7ad2303ee692e Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Mar 2024 18:01:04 -0700 Subject: [PATCH 6/6] (fix) add prometheus_client to req.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 5d6b2307d..4f2282d1c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,6 +19,7 @@ async_generator==1.10.0 # for async ollama calls traceloop-sdk==0.5.3 # for open telemetry logging langfuse>=2.6.3 # for langfuse self-hosted logging datadog-api-client==2.23.0 # for datadog logging +prometheus_client==0.20.0 # for /metrics endpoint on proxy orjson==3.9.15 # fast /embedding responses apscheduler==3.10.4 # for resetting budget in background fastapi-sso==0.10.0 # admin UI, SSO