mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
feat - prometheus log remaining headers
This commit is contained in:
parent
107876ea46
commit
86e3cae596
1 changed files with 96 additions and 5 deletions
|
@ -2,14 +2,20 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# On success, log events to Prometheus
|
# On success, log events to Prometheus
|
||||||
|
|
||||||
import dotenv, os
|
import datetime
|
||||||
import requests # type: ignore
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
import datetime, subprocess, sys
|
import uuid
|
||||||
import litellm, uuid
|
|
||||||
from litellm._logging import print_verbose, verbose_logger
|
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
|
import dotenv
|
||||||
|
import requests # type: ignore
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import print_verbose, verbose_logger
|
||||||
|
|
||||||
|
|
||||||
class PrometheusLogger:
|
class PrometheusLogger:
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
|
@ -20,6 +26,8 @@ class PrometheusLogger:
|
||||||
try:
|
try:
|
||||||
from prometheus_client import Counter, Gauge
|
from prometheus_client import Counter, Gauge
|
||||||
|
|
||||||
|
from litellm.proxy.proxy_server import premium_user
|
||||||
|
|
||||||
self.litellm_llm_api_failed_requests_metric = Counter(
|
self.litellm_llm_api_failed_requests_metric = Counter(
|
||||||
name="litellm_llm_api_failed_requests_metric",
|
name="litellm_llm_api_failed_requests_metric",
|
||||||
documentation="Total number of failed LLM API calls via litellm",
|
documentation="Total number of failed LLM API calls via litellm",
|
||||||
|
@ -88,6 +96,31 @@ class PrometheusLogger:
|
||||||
labelnames=["hashed_api_key", "api_key_alias"],
|
labelnames=["hashed_api_key", "api_key_alias"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Litellm-Enterprise Metrics
|
||||||
|
if premium_user is True:
|
||||||
|
# Remaining Rate Limit for model
|
||||||
|
self.litellm_remaining_requests_metric = Gauge(
|
||||||
|
"litellm_remaining_requests",
|
||||||
|
"remaining requests for model, returned from LLM API Provider",
|
||||||
|
labelnames=[
|
||||||
|
"model_group",
|
||||||
|
"api_provider",
|
||||||
|
"api_base",
|
||||||
|
"litellm_model_name",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
self.litellm_remaining_tokens_metric = Gauge(
|
||||||
|
"litellm_remaining_tokens",
|
||||||
|
"remaining tokens for model, returned from LLM API Provider",
|
||||||
|
labelnames=[
|
||||||
|
"model_group",
|
||||||
|
"api_provider",
|
||||||
|
"api_base",
|
||||||
|
"litellm_model_name",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
||||||
raise e
|
raise e
|
||||||
|
@ -104,6 +137,8 @@ class PrometheusLogger:
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
# Define prometheus client
|
# Define prometheus client
|
||||||
|
from litellm.proxy.proxy_server import premium_user
|
||||||
|
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"prometheus Logging - Enters logging function for model {kwargs}"
|
f"prometheus Logging - Enters logging function for model {kwargs}"
|
||||||
)
|
)
|
||||||
|
@ -199,6 +234,10 @@ class PrometheusLogger:
|
||||||
user_api_key, user_api_key_alias
|
user_api_key, user_api_key_alias
|
||||||
).set(_remaining_api_key_budget)
|
).set(_remaining_api_key_budget)
|
||||||
|
|
||||||
|
# set x-ratelimit headers
|
||||||
|
if premium_user is True:
|
||||||
|
self.set_remaining_tokens_requests_metric(kwargs)
|
||||||
|
|
||||||
### FAILURE INCREMENT ###
|
### FAILURE INCREMENT ###
|
||||||
if "exception" in kwargs:
|
if "exception" in kwargs:
|
||||||
self.litellm_llm_api_failed_requests_metric.labels(
|
self.litellm_llm_api_failed_requests_metric.labels(
|
||||||
|
@ -216,6 +255,58 @@ class PrometheusLogger:
|
||||||
verbose_logger.debug(traceback.format_exc())
|
verbose_logger.debug(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def set_remaining_tokens_requests_metric(self, request_kwargs: dict):
|
||||||
|
try:
|
||||||
|
verbose_logger.debug("setting remaining tokens requests metric")
|
||||||
|
_response_headers = request_kwargs.get("response_headers")
|
||||||
|
_litellm_params = request_kwargs.get("litellm_params", {}) or {}
|
||||||
|
_metadata = _litellm_params.get("metadata", {})
|
||||||
|
litellm_model_name = request_kwargs.get("model", None)
|
||||||
|
model_group = _metadata.get("model_group", None)
|
||||||
|
api_base = _metadata.get("api_base", None)
|
||||||
|
llm_provider = _litellm_params.get("custom_llm_provider", None)
|
||||||
|
|
||||||
|
remaining_requests = None
|
||||||
|
remaining_tokens = None
|
||||||
|
# OpenAI / OpenAI Compatible headers
|
||||||
|
if (
|
||||||
|
_response_headers
|
||||||
|
and "x-ratelimit-remaining-requests" in _response_headers
|
||||||
|
):
|
||||||
|
remaining_requests = _response_headers["x-ratelimit-remaining-requests"]
|
||||||
|
if (
|
||||||
|
_response_headers
|
||||||
|
and "x-ratelimit-remaining-tokens" in _response_headers
|
||||||
|
):
|
||||||
|
remaining_tokens = _response_headers["x-ratelimit-remaining-tokens"]
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"remaining requests: {remaining_requests}, remaining tokens: {remaining_tokens}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if remaining_requests:
|
||||||
|
"""
|
||||||
|
"model_group",
|
||||||
|
"api_provider",
|
||||||
|
"api_base",
|
||||||
|
"litellm_model_name"
|
||||||
|
"""
|
||||||
|
self.litellm_remaining_requests_metric.labels(
|
||||||
|
model_group, llm_provider, api_base, litellm_model_name
|
||||||
|
).set(remaining_requests)
|
||||||
|
|
||||||
|
if remaining_tokens:
|
||||||
|
self.litellm_remaining_tokens_metric.labels(
|
||||||
|
model_group, llm_provider, api_base, litellm_model_name
|
||||||
|
).set(remaining_tokens)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(
|
||||||
|
"Prometheus Error: set_remaining_tokens_requests_metric. Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
def safe_get_remaining_budget(
|
def safe_get_remaining_budget(
|
||||||
max_budget: Optional[float], spend: Optional[float]
|
max_budget: Optional[float], spend: Optional[float]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue