forked from phoenix/litellm-mirror
Merge pull request #2591 from BerriAI/litellm_metrics_endpoint
[Feat] /metrics endpoint for Prometheus, Grafana
This commit is contained in:
commit
c94bc94ad5
5 changed files with 134 additions and 2 deletions
87
litellm/integrations/prometheus.py
Normal file
87
litellm/integrations/prometheus.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
# used for /metrics endpoint on LiteLLM Proxy
|
||||
#### What this does ####
|
||||
# On success + failure, log events to Supabase
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
import datetime, subprocess, sys
|
||||
import litellm, uuid
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
|
||||
|
||||
class PrometheusLogger:
|
||||
# Class variables or attributes
|
||||
def __init__(
|
||||
self,
|
||||
**kwargs,
|
||||
):
|
||||
try:
|
||||
verbose_logger.debug(f"in init prometheus metrics")
|
||||
from prometheus_client import Counter
|
||||
|
||||
self.litellm_requests_metric = Counter(
|
||||
name="litellm_requests_metric",
|
||||
documentation="Total number of LLM calls to litellm",
|
||||
labelnames=["user", "key", "model"],
|
||||
)
|
||||
|
||||
# Counter for spend
|
||||
self.litellm_spend_metric = Counter(
|
||||
"litellm_spend_metric",
|
||||
"Total spend on LLM requests",
|
||||
labelnames=["user", "key", "model"],
|
||||
)
|
||||
|
||||
# Counter for total_output_tokens
|
||||
self.litellm_tokens_metric = Counter(
|
||||
"litellm_total_tokens",
|
||||
"Total number of input + output tokens from LLM requests",
|
||||
labelnames=["user", "key", "model"],
|
||||
)
|
||||
except Exception as e:
|
||||
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
||||
raise e
|
||||
|
||||
async def _async_log_event(
|
||||
self, kwargs, response_obj, start_time, end_time, print_verbose, user_id
|
||||
):
|
||||
self.log_event(kwargs, response_obj, start_time, end_time, print_verbose)
|
||||
|
||||
def log_event(
|
||||
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
||||
):
|
||||
try:
|
||||
# Define prometheus client
|
||||
verbose_logger.debug(
|
||||
f"prometheus Logging - Enters logging function for model {kwargs}"
|
||||
)
|
||||
|
||||
# unpack kwargs
|
||||
model = kwargs.get("model", "")
|
||||
response_cost = kwargs.get("response_cost", 0.0)
|
||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
proxy_server_request = litellm_params.get("proxy_server_request") or {}
|
||||
end_user_id = proxy_server_request.get("body", {}).get("user", None)
|
||||
user_api_key = litellm_params.get("metadata", {}).get("user_api_key", None)
|
||||
tokens_used = response_obj.get("usage", {}).get("total_tokens", 0)
|
||||
|
||||
print_verbose(
|
||||
f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}"
|
||||
)
|
||||
|
||||
self.litellm_requests_metric.labels(end_user_id, user_api_key, model).inc()
|
||||
self.litellm_spend_metric.labels(end_user_id, user_api_key, model).inc(
|
||||
response_cost
|
||||
)
|
||||
self.litellm_tokens_metric.labels(end_user_id, user_api_key, model).inc(
|
||||
tokens_used
|
||||
)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
verbose_logger.debug(
|
||||
f"prometheus Layer Error - {str(e)}\n{traceback.format_exc()}"
|
||||
)
|
||||
pass
|
|
@ -14,4 +14,6 @@ general_settings:
|
|||
master_key: sk-1234
|
||||
router_settings:
|
||||
set_verbose: True
|
||||
debug_level: "DEBUG"
|
||||
debug_level: "DEBUG"
|
||||
litellm_settings:
|
||||
success_callback: ["prometheus"]
|
|
@ -1872,6 +1872,15 @@ class ProxyConfig:
|
|||
# these are litellm callbacks - "langfuse", "sentry", "wandb"
|
||||
else:
|
||||
litellm.success_callback.append(callback)
|
||||
if "prometheus" in callback:
|
||||
verbose_proxy_logger.debug(
|
||||
"Starting Prometheus Metrics on /metrics"
|
||||
)
|
||||
from prometheus_client import make_asgi_app
|
||||
|
||||
# Add prometheus asgi middleware to route /metrics requests
|
||||
metrics_app = make_asgi_app()
|
||||
app.mount("/metrics", metrics_app)
|
||||
print( # noqa
|
||||
f"{blue_color_code} Initialized Success Callbacks - {litellm.success_callback} {reset_color_code}"
|
||||
) # noqa
|
||||
|
|
|
@ -66,6 +66,7 @@ from .integrations.weights_biases import WeightsBiasesLogger
|
|||
from .integrations.custom_logger import CustomLogger
|
||||
from .integrations.langfuse import LangFuseLogger
|
||||
from .integrations.datadog import DataDogLogger
|
||||
from .integrations.prometheus import PrometheusLogger
|
||||
from .integrations.dynamodb import DyanmoDBLogger
|
||||
from .integrations.s3 import S3Logger
|
||||
from .integrations.clickhouse import ClickhouseLogger
|
||||
|
@ -123,6 +124,7 @@ weightsBiasesLogger = None
|
|||
customLogger = None
|
||||
langFuseLogger = None
|
||||
dataDogLogger = None
|
||||
prometheusLogger = None
|
||||
dynamoLogger = None
|
||||
s3Logger = None
|
||||
genericAPILogger = None
|
||||
|
@ -1502,6 +1504,35 @@ class Logging:
|
|||
user_id=kwargs.get("user", None),
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
if callback == "prometheus":
|
||||
global prometheusLogger
|
||||
verbose_logger.debug("reaches prometheus for success logging!")
|
||||
kwargs = {}
|
||||
for k, v in self.model_call_details.items():
|
||||
if (
|
||||
k != "original_response"
|
||||
): # copy.deepcopy raises errors as this could be a coroutine
|
||||
kwargs[k] = v
|
||||
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
|
||||
if self.stream:
|
||||
verbose_logger.debug(
|
||||
f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
|
||||
)
|
||||
if complete_streaming_response is None:
|
||||
continue
|
||||
else:
|
||||
print_verbose(
|
||||
"reaches prometheus for streaming logging!"
|
||||
)
|
||||
result = kwargs["complete_streaming_response"]
|
||||
prometheusLogger.log_event(
|
||||
kwargs=kwargs,
|
||||
response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
user_id=kwargs.get("user", None),
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
if callback == "generic":
|
||||
global genericAPILogger
|
||||
verbose_logger.debug("reaches langfuse for success logging!")
|
||||
|
@ -6111,7 +6142,7 @@ def validate_environment(model: Optional[str] = None) -> dict:
|
|||
|
||||
|
||||
def set_callbacks(callback_list, function_id=None):
|
||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger, dataDogLogger
|
||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger
|
||||
try:
|
||||
for callback in callback_list:
|
||||
print_verbose(f"callback: {callback}")
|
||||
|
@ -6179,6 +6210,8 @@ def set_callbacks(callback_list, function_id=None):
|
|||
langFuseLogger = LangFuseLogger()
|
||||
elif callback == "datadog":
|
||||
dataDogLogger = DataDogLogger()
|
||||
elif callback == "prometheus":
|
||||
prometheusLogger = PrometheusLogger()
|
||||
elif callback == "dynamodb":
|
||||
dynamoLogger = DyanmoDBLogger()
|
||||
elif callback == "s3":
|
||||
|
|
|
@ -18,6 +18,7 @@ google-generativeai==0.3.2 # for vertex ai calls
|
|||
async_generator==1.10.0 # for async ollama calls
|
||||
langfuse>=2.6.3 # for langfuse self-hosted logging
|
||||
datadog-api-client==2.23.0 # for datadog logging
|
||||
prometheus_client==0.20.0 # for /metrics endpoint on proxy
|
||||
orjson==3.9.15 # fast /embedding responses
|
||||
apscheduler==3.10.4 # for resetting budget in background
|
||||
fastapi-sso==0.10.0 # admin UI, SSO
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue