doc new prometheus metrics

2024-08-10 17:10:04 -07:00 · 2024-08-10 17:10:04 -07:00 · cc3316104f
commit cc3316104f
parent a0a1feb7da
2 changed files with 8 additions and 2 deletions
--- a/docs/my-website/docs/proxy/prometheus.md
+++ b/docs/my-website/docs/proxy/prometheus.md
@ -75,6 +75,12 @@ http://localhost:4000/metrics
 | `deployment_state`             | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. |
 | `litellm_remaining_requests_metric`             | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment |
 | `litellm_remaining_tokens`                | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment |
+ `llm_deployment_success_responses`              |  Total number of successful LLM API calls for deployment                               |
+| `llm_deployment_failure_responses`              | Total number of failed LLM API calls for deployment                                   |
+| `llm_deployment_total_requests`                 | Total number of LLM API calls for deployment - success + failure                      |
+| `llm_deployment_latency_per_output_token`       | Latency per output token for deployment                                                          |
+| `llm_deployment_successful_fallbacks`           |  Number of successful fallback requests from primary model -> fallback model        |
+| `llm_deployment_failed_fallbacks`               | Number of failed fallback requests from primary model -> fallback model            |



--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -172,12 +172,12 @@ class PrometheusLogger(CustomLogger):

                self.llm_deployment_successful_fallbacks = Counter(
                    "llm_deployment_successful_fallbacks",
-                    "LLM Deployment Analytics - Number of successful fallback workloads",
+                    "LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
                    ["primary_model", "fallback_model"],
                )
                self.llm_deployment_failed_fallbacks = Counter(
                    "llm_deployment_failed_fallbacks",
-                    "LLM Deployment Analytics - Number of failed fallback workloads",
+                    "LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
                    ["primary_model", "fallback_model"],
                )