mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
track litellm_request_latency_metric
This commit is contained in:
parent
e2cdb00a81
commit
06a362d35f
2 changed files with 41 additions and 1 deletions
|
@ -60,6 +60,25 @@ class PrometheusLogger(CustomLogger):
|
|||
],
|
||||
)
|
||||
|
||||
# request latency metrics
|
||||
self.litellm_request_latency_metric = Histogram(
|
||||
"litellm_request_latency_metric",
|
||||
"Total latency (seconds) for a request to LiteLLM",
|
||||
labelnames=[
|
||||
"model",
|
||||
"litellm_call_id",
|
||||
],
|
||||
)
|
||||
|
||||
self.litellm_deployment_latency_metric = Histogram(
|
||||
"litellm_deployment_latency_metric",
|
||||
"Total latency (seconds) for a models LLM API call",
|
||||
labelnames=[
|
||||
"model",
|
||||
"litellm_call_id",
|
||||
],
|
||||
)
|
||||
|
||||
# Counter for spend
|
||||
self.litellm_spend_metric = Counter(
|
||||
"litellm_spend_metric",
|
||||
|
@ -329,6 +348,25 @@ class PrometheusLogger(CustomLogger):
|
|||
user_api_key, user_api_key_alias, model_group
|
||||
).set(remaining_tokens)
|
||||
|
||||
# latency metrics
|
||||
total_time: timedelta = kwargs.get("end_time") - kwargs.get("start_time")
|
||||
total_time_seconds = total_time.total_seconds()
|
||||
api_call_total_time: timedelta = kwargs.get("end_time") - kwargs.get(
|
||||
"api_call_start_time"
|
||||
)
|
||||
|
||||
api_call_total_time_seconds = api_call_total_time.total_seconds()
|
||||
|
||||
litellm_call_id = kwargs.get("litellm_call_id")
|
||||
|
||||
self.litellm_request_latency_metric.labels(model, litellm_call_id).observe(
|
||||
total_time_seconds
|
||||
)
|
||||
|
||||
self.litellm_deployment_latency_metric.labels(model, litellm_call_id).observe(
|
||||
api_call_total_time_seconds
|
||||
)
|
||||
|
||||
# set x-ratelimit headers
|
||||
if premium_user is True:
|
||||
self.set_llm_deployment_success_metrics(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue