Merge pull request #2591 from BerriAI/litellm_metrics_endpoint

[Feat] /metrics endpoint for Prometheus, Grafana
This commit is contained in:
Ishaan Jaff 2024-03-19 18:08:22 -07:00 committed by GitHub
commit c94bc94ad5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 134 additions and 2 deletions

View file

@ -0,0 +1,87 @@
# used for /metrics endpoint on LiteLLM Proxy
#### What this does ####
# On success + failure, log events to Supabase
import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
import datetime, subprocess, sys
import litellm, uuid
from litellm._logging import print_verbose, verbose_logger
class PrometheusLogger:
# Class variables or attributes
def __init__(
self,
**kwargs,
):
try:
verbose_logger.debug(f"in init prometheus metrics")
from prometheus_client import Counter
self.litellm_requests_metric = Counter(
name="litellm_requests_metric",
documentation="Total number of LLM calls to litellm",
labelnames=["user", "key", "model"],
)
# Counter for spend
self.litellm_spend_metric = Counter(
"litellm_spend_metric",
"Total spend on LLM requests",
labelnames=["user", "key", "model"],
)
# Counter for total_output_tokens
self.litellm_tokens_metric = Counter(
"litellm_total_tokens",
"Total number of input + output tokens from LLM requests",
labelnames=["user", "key", "model"],
)
except Exception as e:
print_verbose(f"Got exception on init prometheus client {str(e)}")
raise e
async def _async_log_event(
self, kwargs, response_obj, start_time, end_time, print_verbose, user_id
):
self.log_event(kwargs, response_obj, start_time, end_time, print_verbose)
def log_event(
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
):
try:
# Define prometheus client
verbose_logger.debug(
f"prometheus Logging - Enters logging function for model {kwargs}"
)
# unpack kwargs
model = kwargs.get("model", "")
response_cost = kwargs.get("response_cost", 0.0)
litellm_params = kwargs.get("litellm_params", {}) or {}
proxy_server_request = litellm_params.get("proxy_server_request") or {}
end_user_id = proxy_server_request.get("body", {}).get("user", None)
user_api_key = litellm_params.get("metadata", {}).get("user_api_key", None)
tokens_used = response_obj.get("usage", {}).get("total_tokens", 0)
print_verbose(
f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}"
)
self.litellm_requests_metric.labels(end_user_id, user_api_key, model).inc()
self.litellm_spend_metric.labels(end_user_id, user_api_key, model).inc(
response_cost
)
self.litellm_tokens_metric.labels(end_user_id, user_api_key, model).inc(
tokens_used
)
except Exception as e:
traceback.print_exc()
verbose_logger.debug(
f"prometheus Layer Error - {str(e)}\n{traceback.format_exc()}"
)
pass

View file

@ -14,4 +14,6 @@ general_settings:
master_key: sk-1234 master_key: sk-1234
router_settings: router_settings:
set_verbose: True set_verbose: True
debug_level: "DEBUG" debug_level: "DEBUG"
litellm_settings:
success_callback: ["prometheus"]

View file

@ -1872,6 +1872,15 @@ class ProxyConfig:
# these are litellm callbacks - "langfuse", "sentry", "wandb" # these are litellm callbacks - "langfuse", "sentry", "wandb"
else: else:
litellm.success_callback.append(callback) litellm.success_callback.append(callback)
if "prometheus" in callback:
verbose_proxy_logger.debug(
"Starting Prometheus Metrics on /metrics"
)
from prometheus_client import make_asgi_app
# Add prometheus asgi middleware to route /metrics requests
metrics_app = make_asgi_app()
app.mount("/metrics", metrics_app)
print( # noqa print( # noqa
f"{blue_color_code} Initialized Success Callbacks - {litellm.success_callback} {reset_color_code}" f"{blue_color_code} Initialized Success Callbacks - {litellm.success_callback} {reset_color_code}"
) # noqa ) # noqa

View file

@ -66,6 +66,7 @@ from .integrations.weights_biases import WeightsBiasesLogger
from .integrations.custom_logger import CustomLogger from .integrations.custom_logger import CustomLogger
from .integrations.langfuse import LangFuseLogger from .integrations.langfuse import LangFuseLogger
from .integrations.datadog import DataDogLogger from .integrations.datadog import DataDogLogger
from .integrations.prometheus import PrometheusLogger
from .integrations.dynamodb import DyanmoDBLogger from .integrations.dynamodb import DyanmoDBLogger
from .integrations.s3 import S3Logger from .integrations.s3 import S3Logger
from .integrations.clickhouse import ClickhouseLogger from .integrations.clickhouse import ClickhouseLogger
@ -123,6 +124,7 @@ weightsBiasesLogger = None
customLogger = None customLogger = None
langFuseLogger = None langFuseLogger = None
dataDogLogger = None dataDogLogger = None
prometheusLogger = None
dynamoLogger = None dynamoLogger = None
s3Logger = None s3Logger = None
genericAPILogger = None genericAPILogger = None
@ -1502,6 +1504,35 @@ class Logging:
user_id=kwargs.get("user", None), user_id=kwargs.get("user", None),
print_verbose=print_verbose, print_verbose=print_verbose,
) )
if callback == "prometheus":
global prometheusLogger
verbose_logger.debug("reaches prometheus for success logging!")
kwargs = {}
for k, v in self.model_call_details.items():
if (
k != "original_response"
): # copy.deepcopy raises errors as this could be a coroutine
kwargs[k] = v
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
if self.stream:
verbose_logger.debug(
f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
)
if complete_streaming_response is None:
continue
else:
print_verbose(
"reaches prometheus for streaming logging!"
)
result = kwargs["complete_streaming_response"]
prometheusLogger.log_event(
kwargs=kwargs,
response_obj=result,
start_time=start_time,
end_time=end_time,
user_id=kwargs.get("user", None),
print_verbose=print_verbose,
)
if callback == "generic": if callback == "generic":
global genericAPILogger global genericAPILogger
verbose_logger.debug("reaches langfuse for success logging!") verbose_logger.debug("reaches langfuse for success logging!")
@ -6111,7 +6142,7 @@ def validate_environment(model: Optional[str] = None) -> dict:
def set_callbacks(callback_list, function_id=None): def set_callbacks(callback_list, function_id=None):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger, dataDogLogger global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger
try: try:
for callback in callback_list: for callback in callback_list:
print_verbose(f"callback: {callback}") print_verbose(f"callback: {callback}")
@ -6179,6 +6210,8 @@ def set_callbacks(callback_list, function_id=None):
langFuseLogger = LangFuseLogger() langFuseLogger = LangFuseLogger()
elif callback == "datadog": elif callback == "datadog":
dataDogLogger = DataDogLogger() dataDogLogger = DataDogLogger()
elif callback == "prometheus":
prometheusLogger = PrometheusLogger()
elif callback == "dynamodb": elif callback == "dynamodb":
dynamoLogger = DyanmoDBLogger() dynamoLogger = DyanmoDBLogger()
elif callback == "s3": elif callback == "s3":

View file

@ -18,6 +18,7 @@ google-generativeai==0.3.2 # for vertex ai calls
async_generator==1.10.0 # for async ollama calls async_generator==1.10.0 # for async ollama calls
langfuse>=2.6.3 # for langfuse self-hosted logging langfuse>=2.6.3 # for langfuse self-hosted logging
datadog-api-client==2.23.0 # for datadog logging datadog-api-client==2.23.0 # for datadog logging
prometheus_client==0.20.0 # for /metrics endpoint on proxy
orjson==3.9.15 # fast /embedding responses orjson==3.9.15 # fast /embedding responses
apscheduler==3.10.4 # for resetting budget in background apscheduler==3.10.4 # for resetting budget in background
fastapi-sso==0.10.0 # admin UI, SSO fastapi-sso==0.10.0 # admin UI, SSO