track litellm_request_latency_metric

This commit is contained in:
Ishaan Jaff 2024-08-22 13:58:10 -07:00
parent e2cdb00a81
commit 06a362d35f
2 changed files with 41 additions and 1 deletions

View file

@ -60,6 +60,25 @@ class PrometheusLogger(CustomLogger):
],
)
# request latency metrics
self.litellm_request_latency_metric = Histogram(
"litellm_request_latency_metric",
"Total latency (seconds) for a request to LiteLLM",
labelnames=[
"model",
"litellm_call_id",
],
)
self.litellm_deployment_latency_metric = Histogram(
"litellm_deployment_latency_metric",
"Total latency (seconds) for a models LLM API call",
labelnames=[
"model",
"litellm_call_id",
],
)
# Counter for spend
self.litellm_spend_metric = Counter(
"litellm_spend_metric",
@ -329,6 +348,25 @@ class PrometheusLogger(CustomLogger):
user_api_key, user_api_key_alias, model_group
).set(remaining_tokens)
# latency metrics
total_time: timedelta = kwargs.get("end_time") - kwargs.get("start_time")
total_time_seconds = total_time.total_seconds()
api_call_total_time: timedelta = kwargs.get("end_time") - kwargs.get(
"api_call_start_time"
)
api_call_total_time_seconds = api_call_total_time.total_seconds()
litellm_call_id = kwargs.get("litellm_call_id")
self.litellm_request_latency_metric.labels(model, litellm_call_id).observe(
total_time_seconds
)
self.litellm_deployment_latency_metric.labels(model, litellm_call_id).observe(
api_call_total_time_seconds
)
# set x-ratelimit headers
if premium_user is True:
self.set_llm_deployment_success_metrics(