mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
feat - prometheus log remaining headers
This commit is contained in:
parent
04a975d486
commit
c37d45d556
1 changed files with 96 additions and 5 deletions
|
@ -2,14 +2,20 @@
|
|||
#### What this does ####
|
||||
# On success, log events to Prometheus
|
||||
|
||||
import dotenv, os
|
||||
import requests # type: ignore
|
||||
import datetime
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
import datetime, subprocess, sys
|
||||
import litellm, uuid
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
import uuid
|
||||
from typing import Optional, Union
|
||||
|
||||
import dotenv
|
||||
import requests # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
|
||||
|
||||
class PrometheusLogger:
|
||||
# Class variables or attributes
|
||||
|
@ -20,6 +26,8 @@ class PrometheusLogger:
|
|||
try:
|
||||
from prometheus_client import Counter, Gauge
|
||||
|
||||
from litellm.proxy.proxy_server import premium_user
|
||||
|
||||
self.litellm_llm_api_failed_requests_metric = Counter(
|
||||
name="litellm_llm_api_failed_requests_metric",
|
||||
documentation="Total number of failed LLM API calls via litellm",
|
||||
|
@ -88,6 +96,31 @@ class PrometheusLogger:
|
|||
labelnames=["hashed_api_key", "api_key_alias"],
|
||||
)
|
||||
|
||||
# Litellm-Enterprise Metrics
|
||||
if premium_user is True:
|
||||
# Remaining Rate Limit for model
|
||||
self.litellm_remaining_requests_metric = Gauge(
|
||||
"litellm_remaining_requests",
|
||||
"remaining requests for model, returned from LLM API Provider",
|
||||
labelnames=[
|
||||
"model_group",
|
||||
"api_provider",
|
||||
"api_base",
|
||||
"litellm_model_name",
|
||||
],
|
||||
)
|
||||
|
||||
self.litellm_remaining_tokens_metric = Gauge(
|
||||
"litellm_remaining_tokens",
|
||||
"remaining tokens for model, returned from LLM API Provider",
|
||||
labelnames=[
|
||||
"model_group",
|
||||
"api_provider",
|
||||
"api_base",
|
||||
"litellm_model_name",
|
||||
],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
||||
raise e
|
||||
|
@ -104,6 +137,8 @@ class PrometheusLogger:
|
|||
):
|
||||
try:
|
||||
# Define prometheus client
|
||||
from litellm.proxy.proxy_server import premium_user
|
||||
|
||||
verbose_logger.debug(
|
||||
f"prometheus Logging - Enters logging function for model {kwargs}"
|
||||
)
|
||||
|
@ -199,6 +234,10 @@ class PrometheusLogger:
|
|||
user_api_key, user_api_key_alias
|
||||
).set(_remaining_api_key_budget)
|
||||
|
||||
# set x-ratelimit headers
|
||||
if premium_user is True:
|
||||
self.set_remaining_tokens_requests_metric(kwargs)
|
||||
|
||||
### FAILURE INCREMENT ###
|
||||
if "exception" in kwargs:
|
||||
self.litellm_llm_api_failed_requests_metric.labels(
|
||||
|
@ -216,6 +255,58 @@ class PrometheusLogger:
|
|||
verbose_logger.debug(traceback.format_exc())
|
||||
pass
|
||||
|
||||
def set_remaining_tokens_requests_metric(self, request_kwargs: dict):
|
||||
try:
|
||||
verbose_logger.debug("setting remaining tokens requests metric")
|
||||
_response_headers = request_kwargs.get("response_headers")
|
||||
_litellm_params = request_kwargs.get("litellm_params", {}) or {}
|
||||
_metadata = _litellm_params.get("metadata", {})
|
||||
litellm_model_name = request_kwargs.get("model", None)
|
||||
model_group = _metadata.get("model_group", None)
|
||||
api_base = _metadata.get("api_base", None)
|
||||
llm_provider = _litellm_params.get("custom_llm_provider", None)
|
||||
|
||||
remaining_requests = None
|
||||
remaining_tokens = None
|
||||
# OpenAI / OpenAI Compatible headers
|
||||
if (
|
||||
_response_headers
|
||||
and "x-ratelimit-remaining-requests" in _response_headers
|
||||
):
|
||||
remaining_requests = _response_headers["x-ratelimit-remaining-requests"]
|
||||
if (
|
||||
_response_headers
|
||||
and "x-ratelimit-remaining-tokens" in _response_headers
|
||||
):
|
||||
remaining_tokens = _response_headers["x-ratelimit-remaining-tokens"]
|
||||
verbose_logger.debug(
|
||||
f"remaining requests: {remaining_requests}, remaining tokens: {remaining_tokens}"
|
||||
)
|
||||
|
||||
if remaining_requests:
|
||||
"""
|
||||
"model_group",
|
||||
"api_provider",
|
||||
"api_base",
|
||||
"litellm_model_name"
|
||||
"""
|
||||
self.litellm_remaining_requests_metric.labels(
|
||||
model_group, llm_provider, api_base, litellm_model_name
|
||||
).set(remaining_requests)
|
||||
|
||||
if remaining_tokens:
|
||||
self.litellm_remaining_tokens_metric.labels(
|
||||
model_group, llm_provider, api_base, litellm_model_name
|
||||
).set(remaining_tokens)
|
||||
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"Prometheus Error: set_remaining_tokens_requests_metric. Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
def safe_get_remaining_budget(
|
||||
max_budget: Optional[float], spend: Optional[float]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue