From a109853d21759c78e3185c9db3d1b9cebdffe54b Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 18 Sep 2024 12:46:58 -0700
Subject: [PATCH] [Prometheus] track requested model (#5774)

* enforce prometheus as enterprise feature

* show correct error on prometheus metric when not enrterprise user

* docs promethues metrics enforced

* track requested model on prometheus

* docs prom metrics

* fix prom tracking failures
---
 docs/my-website/docs/proxy/prometheus.md |  9 +++++++++
 litellm/integrations/prometheus.py       | 13 +++++++++----
 litellm/proxy/_types.py                  |  2 +-
 litellm/proxy/proxy_config.yaml          |  4 +++-
 4 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md
index e44fd0df6..71af4f40d 100644
--- a/docs/my-website/docs/proxy/prometheus.md
+++ b/docs/my-website/docs/proxy/prometheus.md
@@ -74,6 +74,15 @@ Use this for for tracking per [user, key, team, etc.](virtual_keys)
 
 Use this for LLM API Error monitoring and tracking remaining rate limits and token limits
 
+#### Labels Tracked for LLM API Metrics
+```json
+litellm_model_name: The name of the LLM model used by LiteLLM
+requested_model: The model sent in the request
+model_id: The model_id of the deployment. Autogenerated by LiteLLM, each deployment has a unique model_id
+api_base: The API Base of the deployment
+api_provider: The LLM API provider, used for the provider. Example (azure, openai, vertex_ai)
+```
+
 | Metric Name          | Description                          |
 |----------------------|--------------------------------------|
  `litellm_deployment_success_responses`              |  Total number of successful LLM API calls for deployment                               |
diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
index 0bf7079d0..5fb69a90a 100644
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@@ -193,17 +193,17 @@ class PrometheusLogger(CustomLogger):
             self.litellm_deployment_success_responses = Counter(
                 name="litellm_deployment_success_responses",
                 documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
-                labelnames=_logged_llm_labels,
+                labelnames=["requested_model"] + _logged_llm_labels,
             )
             self.litellm_deployment_failure_responses = Counter(
                 name="litellm_deployment_failure_responses",
                 documentation="LLM Deployment Analytics - Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api",
-                labelnames=_logged_llm_labels + ["exception_status"],
+                labelnames=["requested_model", "exception_status"] + _logged_llm_labels,
             )
             self.litellm_deployment_total_requests = Counter(
                 name="litellm_deployment_total_requests",
                 documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
-                labelnames=_logged_llm_labels,
+                labelnames=["requested_model"] + _logged_llm_labels,
             )
 
             # Deployment Latency tracking
@@ -440,6 +440,7 @@ class PrometheusLogger(CustomLogger):
             _metadata = _litellm_params.get("metadata", {})
             litellm_model_name = request_kwargs.get("model", None)
             api_base = _metadata.get("api_base", None)
+            model_group = _metadata.get("model_group", None)
             if api_base is None:
                 api_base = _litellm_params.get("api_base", None)
             llm_provider = _litellm_params.get("custom_llm_provider", None)
@@ -465,6 +466,7 @@ class PrometheusLogger(CustomLogger):
                 api_base=api_base,
                 api_provider=llm_provider,
                 exception_status=exception_status_code,
+                requested_model=model_group,
             ).inc()
 
             self.litellm_deployment_total_requests.labels(
@@ -472,6 +474,7 @@ class PrometheusLogger(CustomLogger):
                 model_id=model_id,
                 api_base=api_base,
                 api_provider=llm_provider,
+                requested_model=model_group,
             ).inc()
 
             pass
@@ -534,7 +537,7 @@ class PrometheusLogger(CustomLogger):
 
             """
             log these labels
-            ["litellm_model_name", "model_id", "api_base", "api_provider"]
+            ["litellm_model_name", "requested_model", model_id", "api_base", "api_provider"]
             """
             self.set_deployment_healthy(
                 litellm_model_name=litellm_model_name,
@@ -548,6 +551,7 @@ class PrometheusLogger(CustomLogger):
                 model_id=model_id,
                 api_base=api_base,
                 api_provider=llm_provider,
+                requested_model=model_group,
             ).inc()
 
             self.litellm_deployment_total_requests.labels(
@@ -555,6 +559,7 @@ class PrometheusLogger(CustomLogger):
                 model_id=model_id,
                 api_base=api_base,
                 api_provider=llm_provider,
+                requested_model=model_group,
             ).inc()
 
             # Track deployment Latency
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index 2aeb8d7e7..57873e94a 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -1844,7 +1844,7 @@ class CommonProxyErrors(str, enum.Enum):
     db_not_connected_error = "DB not connected"
     no_llm_router = "No models configured on proxy"
     not_allowed_access = "Admin-only endpoint. Not allowed to access this."
-    not_premium_user = "You must be a LiteLLM Enterprise user to use this feature. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat"
+    not_premium_user = "You must be a LiteLLM Enterprise user to use this feature. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat. \nPricing: https://www.litellm.ai/#pricing"
 
 
 class SpendCalculateRequest(LiteLLMBase):
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 678da553a..50f395f4c 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -24,7 +24,9 @@ model_list:
 general_settings: 
  master_key: sk-1234 
 
+
+
 litellm_settings:
   success_callback: ["gcs_bucket"]
-  
+