diff --git a/cookbook/litellm_proxy_server/grafana_dashboard/dashboard_v2/grafana_dashboard.json b/cookbook/litellm_proxy_server/grafana_dashboard/dashboard_v2/grafana_dashboard.json new file mode 100644 index 000000000..507a0b4a1 --- /dev/null +++ b/cookbook/litellm_proxy_server/grafana_dashboard/dashboard_v2/grafana_dashboard.json @@ -0,0 +1,807 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 20, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 3, + "panels": [], + "title": "LiteLLM Proxy Level Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "description": "Total requests per second made to proxy - success + failure ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-76761.patch01-77040", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(rate(litellm_proxy_total_requests_metric_total[2m]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Proxy - Requests per second (success + failure)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "description": "Failures per second by Exception Class", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-76761.patch01-77040", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(rate(litellm_proxy_failed_requests_metric_total[2m])) by (exception_class)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Proxy Failure Responses / Second By Exception Class", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "description": "Average Response latency (seconds)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "sum(rate(litellm_request_total_latency_metric_sum[2m]))/sum(rate(litellm_request_total_latency_metric_count[2m]))" + }, + "properties": [ + { + "id": "displayName", + "value": "Average Latency (seconds)" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum(rate(litellm_request_total_latency_metric_bucket[2m])) by (le))" + }, + "properties": [ + { + "id": "displayName", + "value": "Median Latency (seconds)" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-76761.patch01-77040", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(rate(litellm_request_total_latency_metric_sum[2m]))/sum(rate(litellm_request_total_latency_metric_count[2m]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(litellm_request_total_latency_metric_bucket[2m])) by (le))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "Median latency seconds" + } + ], + "title": "Proxy - Average & Median Response Latency (seconds)", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 7, + "panels": [], + "title": "LLM API Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "description": "x-ratelimit-remaining-requests returning from LLM APIs", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-76761.patch01-77040", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "editorMode": "code", + "expr": "topk(5, sort(litellm_remaining_requests))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "x-ratelimit-remaining-requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "description": "x-ratelimit-remaining-tokens from LLM API ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-76761.patch01-77040", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "editorMode": "code", + "expr": "topk(5, sort(litellm_remaining_tokens))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "x-ratelimit-remaining-tokens", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 4, + "panels": [], + "title": "LiteLLM Metrics by Virtual Key and Team", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "description": "Requests per second by Key Alias (keys are LiteLLM Virtual Keys). If key is None - means no Alias Set ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-76761.patch01-77040", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "editorMode": "code", + "expr": "sum(rate(litellm_proxy_total_requests_metric_total[2m])) by (api_key_alias)\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Requests per second by Key Alias", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "description": "Requests per second by Team Alias. If team is None - means no team alias Set ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-76761.patch01-77040", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "bdiyc60dco54we" + }, + "editorMode": "code", + "expr": "sum(rate(litellm_proxy_total_requests_metric_total[2m])) by (team_alias)\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Requests per second by Team Alias", + "type": "timeseries" + } + ], + "preload": false, + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "LiteLLM Prod v2", + "uid": "be059pwgrlg5cf", + "version": 17, + "weekStart": "" + } \ No newline at end of file diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index 06654bd69..207b1abe1 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -1,5 +1,6 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; +import Image from '@theme/IdealImage'; # 📈 Prometheus metrics @@ -169,12 +170,22 @@ litellm_settings: | `litellm_redis_fails` | Number of failed redis calls | | `litellm_self_latency` | Histogram latency for successful litellm api call | -## 🔥 Community Maintained Grafana Dashboards +## **🔥 LiteLLM Maintained Grafana Dashboards ** -Link to Grafana Dashboards made by LiteLLM community +Link to Grafana Dashboards maintained by LiteLLM https://github.com/BerriAI/litellm/tree/main/cookbook/litellm_proxy_server/grafana_dashboard +Here is a screenshot of the metrics you can monitor with the LiteLLM Grafana Dashboard + + + + + + + + + ## Deprecated Metrics | Metric Name | Description | diff --git a/docs/my-website/img/grafana_1.png b/docs/my-website/img/grafana_1.png new file mode 100644 index 000000000..1bbc3be14 Binary files /dev/null and b/docs/my-website/img/grafana_1.png differ diff --git a/docs/my-website/img/grafana_2.png b/docs/my-website/img/grafana_2.png new file mode 100644 index 000000000..39e8880cc Binary files /dev/null and b/docs/my-website/img/grafana_2.png differ diff --git a/docs/my-website/img/grafana_3.png b/docs/my-website/img/grafana_3.png new file mode 100644 index 000000000..e2d5c5798 Binary files /dev/null and b/docs/my-website/img/grafana_3.png differ