use litellm_ prefix for new deployment metrics

This commit is contained in:
Ishaan Jaff 2024-08-14 09:08:14 -07:00
parent 4cef6df4cf
commit acadabe6c9
4 changed files with 38 additions and 38 deletions

View file

@ -72,15 +72,15 @@ http://localhost:4000/metrics
| Metric Name | Description | | Metric Name | Description |
|----------------------|--------------------------------------| |----------------------|--------------------------------------|
| `deployment_state` | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. | | `litellm_deployment_state` | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. |
| `litellm_remaining_requests_metric` | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment | | `litellm_remaining_requests_metric` | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment |
| `litellm_remaining_tokens` | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment | | `litellm_remaining_tokens` | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment |
`llm_deployment_success_responses` | Total number of successful LLM API calls for deployment | `litellm_deployment_success_responses` | Total number of successful LLM API calls for deployment |
| `llm_deployment_failure_responses` | Total number of failed LLM API calls for deployment | | `litellm_deployment_failure_responses` | Total number of failed LLM API calls for deployment |
| `llm_deployment_total_requests` | Total number of LLM API calls for deployment - success + failure | | `litellm_deployment_total_requests` | Total number of LLM API calls for deployment - success + failure |
| `llm_deployment_latency_per_output_token` | Latency per output token for deployment | | `litellm_deployment_latency_per_output_token` | Latency per output token for deployment |
| `llm_deployment_successful_fallbacks` | Number of successful fallback requests from primary model -> fallback model | | `litellm_deployment_successful_fallbacks` | Number of successful fallback requests from primary model -> fallback model |
| `llm_deployment_failed_fallbacks` | Number of failed fallback requests from primary model -> fallback model | | `litellm_deployment_failed_fallbacks` | Number of failed fallback requests from primary model -> fallback model |

View file

@ -141,42 +141,42 @@ class PrometheusLogger(CustomLogger):
] ]
# Metric for deployment state # Metric for deployment state
self.deployment_state = Gauge( self.litellm_deployment_state = Gauge(
"deployment_state", "litellm_deployment_state",
"LLM Deployment Analytics - The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage", "LLM Deployment Analytics - The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage",
labelnames=_logged_llm_labels, labelnames=_logged_llm_labels,
) )
self.llm_deployment_success_responses = Counter( self.litellm_deployment_success_responses = Counter(
name="llm_deployment_success_responses", name="litellm_deployment_success_responses",
documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm", documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
labelnames=_logged_llm_labels, labelnames=_logged_llm_labels,
) )
self.llm_deployment_failure_responses = Counter( self.litellm_deployment_failure_responses = Counter(
name="llm_deployment_failure_responses", name="litellm_deployment_failure_responses",
documentation="LLM Deployment Analytics - Total number of failed LLM API calls via litellm", documentation="LLM Deployment Analytics - Total number of failed LLM API calls via litellm",
labelnames=_logged_llm_labels, labelnames=_logged_llm_labels,
) )
self.llm_deployment_total_requests = Counter( self.litellm_deployment_total_requests = Counter(
name="llm_deployment_total_requests", name="litellm_deployment_total_requests",
documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure", documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
labelnames=_logged_llm_labels, labelnames=_logged_llm_labels,
) )
# Deployment Latency tracking # Deployment Latency tracking
self.llm_deployment_latency_per_output_token = Histogram( self.litellm_deployment_latency_per_output_token = Histogram(
name="llm_deployment_latency_per_output_token", name="litellm_deployment_latency_per_output_token",
documentation="LLM Deployment Analytics - Latency per output token", documentation="LLM Deployment Analytics - Latency per output token",
labelnames=_logged_llm_labels, labelnames=_logged_llm_labels,
) )
self.llm_deployment_successful_fallbacks = Counter( self.litellm_deployment_successful_fallbacks = Counter(
"llm_deployment_successful_fallbacks", "litellm_deployment_successful_fallbacks",
"LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model", "LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
["primary_model", "fallback_model"], ["primary_model", "fallback_model"],
) )
self.llm_deployment_failed_fallbacks = Counter( self.litellm_deployment_failed_fallbacks = Counter(
"llm_deployment_failed_fallbacks", "litellm_deployment_failed_fallbacks",
"LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model", "LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
["primary_model", "fallback_model"], ["primary_model", "fallback_model"],
) )
@ -358,14 +358,14 @@ class PrometheusLogger(CustomLogger):
api_provider=llm_provider, api_provider=llm_provider,
) )
self.llm_deployment_failure_responses.labels( self.litellm_deployment_failure_responses.labels(
litellm_model_name=litellm_model_name, litellm_model_name=litellm_model_name,
model_id=model_id, model_id=model_id,
api_base=api_base, api_base=api_base,
api_provider=llm_provider, api_provider=llm_provider,
).inc() ).inc()
self.llm_deployment_total_requests.labels( self.litellm_deployment_total_requests.labels(
litellm_model_name=litellm_model_name, litellm_model_name=litellm_model_name,
model_id=model_id, model_id=model_id,
api_base=api_base, api_base=api_base,
@ -438,14 +438,14 @@ class PrometheusLogger(CustomLogger):
api_provider=llm_provider, api_provider=llm_provider,
) )
self.llm_deployment_success_responses.labels( self.litellm_deployment_success_responses.labels(
litellm_model_name=litellm_model_name, litellm_model_name=litellm_model_name,
model_id=model_id, model_id=model_id,
api_base=api_base, api_base=api_base,
api_provider=llm_provider, api_provider=llm_provider,
).inc() ).inc()
self.llm_deployment_total_requests.labels( self.litellm_deployment_total_requests.labels(
litellm_model_name=litellm_model_name, litellm_model_name=litellm_model_name,
model_id=model_id, model_id=model_id,
api_base=api_base, api_base=api_base,
@ -475,7 +475,7 @@ class PrometheusLogger(CustomLogger):
latency_per_token = None latency_per_token = None
if output_tokens is not None and output_tokens > 0: if output_tokens is not None and output_tokens > 0:
latency_per_token = _latency_seconds / output_tokens latency_per_token = _latency_seconds / output_tokens
self.llm_deployment_latency_per_output_token.labels( self.litellm_deployment_latency_per_output_token.labels(
litellm_model_name=litellm_model_name, litellm_model_name=litellm_model_name,
model_id=model_id, model_id=model_id,
api_base=api_base, api_base=api_base,
@ -497,7 +497,7 @@ class PrometheusLogger(CustomLogger):
kwargs, kwargs,
) )
_new_model = kwargs.get("model") _new_model = kwargs.get("model")
self.llm_deployment_successful_fallbacks.labels( self.litellm_deployment_successful_fallbacks.labels(
primary_model=original_model_group, fallback_model=_new_model primary_model=original_model_group, fallback_model=_new_model
).inc() ).inc()
@ -508,11 +508,11 @@ class PrometheusLogger(CustomLogger):
kwargs, kwargs,
) )
_new_model = kwargs.get("model") _new_model = kwargs.get("model")
self.llm_deployment_failed_fallbacks.labels( self.litellm_deployment_failed_fallbacks.labels(
primary_model=original_model_group, fallback_model=_new_model primary_model=original_model_group, fallback_model=_new_model
).inc() ).inc()
def set_deployment_state( def set_litellm_deployment_state(
self, self,
state: int, state: int,
litellm_model_name: str, litellm_model_name: str,
@ -520,7 +520,7 @@ class PrometheusLogger(CustomLogger):
api_base: str, api_base: str,
api_provider: str, api_provider: str,
): ):
self.deployment_state.labels( self.litellm_deployment_state.labels(
litellm_model_name, model_id, api_base, api_provider litellm_model_name, model_id, api_base, api_provider
).set(state) ).set(state)
@ -531,7 +531,7 @@ class PrometheusLogger(CustomLogger):
api_base: str, api_base: str,
api_provider: str, api_provider: str,
): ):
self.set_deployment_state( self.set_litellm_deployment_state(
0, litellm_model_name, model_id, api_base, api_provider 0, litellm_model_name, model_id, api_base, api_provider
) )
@ -542,7 +542,7 @@ class PrometheusLogger(CustomLogger):
api_base: str, api_base: str,
api_provider: str, api_provider: str,
): ):
self.set_deployment_state( self.set_litellm_deployment_state(
1, litellm_model_name, model_id, api_base, api_provider 1, litellm_model_name, model_id, api_base, api_provider
) )
@ -553,7 +553,7 @@ class PrometheusLogger(CustomLogger):
api_base: str, api_base: str,
api_provider: str, api_provider: str,
): ):
self.set_deployment_state( self.set_litellm_deployment_state(
2, litellm_model_name, model_id, api_base, api_provider 2, litellm_model_name, model_id, api_base, api_provider
) )

View file

@ -41,8 +41,8 @@ async def get_fallback_metric_from_prometheus():
""" """
response_message = "" response_message = ""
relevant_metrics = [ relevant_metrics = [
"llm_deployment_successful_fallbacks_total", "litellm_deployment_successful_fallbacks_total",
"llm_deployment_failed_fallbacks_total", "litellm_deployment_failed_fallbacks_total",
] ]
for metric in relevant_metrics: for metric in relevant_metrics:
response_json = await get_metric_from_prometheus( response_json = await get_metric_from_prometheus(

View file

@ -76,6 +76,6 @@ async def test_async_prometheus_success_logging():
print("metrics from prometheus", metrics) print("metrics from prometheus", metrics)
assert metrics["litellm_requests_metric_total"] == 1.0 assert metrics["litellm_requests_metric_total"] == 1.0
assert metrics["litellm_total_tokens_total"] == 30.0 assert metrics["litellm_total_tokens_total"] == 30.0
assert metrics["llm_deployment_success_responses_total"] == 1.0 assert metrics["litellm_deployment_success_responses_total"] == 1.0
assert metrics["llm_deployment_total_requests_total"] == 1.0 assert metrics["litellm_deployment_total_requests_total"] == 1.0
assert metrics["llm_deployment_latency_per_output_token_bucket"] == 1.0 assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0