From a109853d21759c78e3185c9db3d1b9cebdffe54b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 18 Sep 2024 12:46:58 -0700 Subject: [PATCH] [Prometheus] track requested model (#5774) * enforce prometheus as enterprise feature * show correct error on prometheus metric when not enrterprise user * docs promethues metrics enforced * track requested model on prometheus * docs prom metrics * fix prom tracking failures --- docs/my-website/docs/proxy/prometheus.md | 9 +++++++++ litellm/integrations/prometheus.py | 13 +++++++++---- litellm/proxy/_types.py | 2 +- litellm/proxy/proxy_config.yaml | 4 +++- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index e44fd0df6..71af4f40d 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -74,6 +74,15 @@ Use this for for tracking per [user, key, team, etc.](virtual_keys) Use this for LLM API Error monitoring and tracking remaining rate limits and token limits +#### Labels Tracked for LLM API Metrics +```json +litellm_model_name: The name of the LLM model used by LiteLLM +requested_model: The model sent in the request +model_id: The model_id of the deployment. Autogenerated by LiteLLM, each deployment has a unique model_id +api_base: The API Base of the deployment +api_provider: The LLM API provider, used for the provider. Example (azure, openai, vertex_ai) +``` + | Metric Name | Description | |----------------------|--------------------------------------| `litellm_deployment_success_responses` | Total number of successful LLM API calls for deployment | diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 0bf7079d0..5fb69a90a 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -193,17 +193,17 @@ class PrometheusLogger(CustomLogger): self.litellm_deployment_success_responses = Counter( name="litellm_deployment_success_responses", documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm", - labelnames=_logged_llm_labels, + labelnames=["requested_model"] + _logged_llm_labels, ) self.litellm_deployment_failure_responses = Counter( name="litellm_deployment_failure_responses", documentation="LLM Deployment Analytics - Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api", - labelnames=_logged_llm_labels + ["exception_status"], + labelnames=["requested_model", "exception_status"] + _logged_llm_labels, ) self.litellm_deployment_total_requests = Counter( name="litellm_deployment_total_requests", documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure", - labelnames=_logged_llm_labels, + labelnames=["requested_model"] + _logged_llm_labels, ) # Deployment Latency tracking @@ -440,6 +440,7 @@ class PrometheusLogger(CustomLogger): _metadata = _litellm_params.get("metadata", {}) litellm_model_name = request_kwargs.get("model", None) api_base = _metadata.get("api_base", None) + model_group = _metadata.get("model_group", None) if api_base is None: api_base = _litellm_params.get("api_base", None) llm_provider = _litellm_params.get("custom_llm_provider", None) @@ -465,6 +466,7 @@ class PrometheusLogger(CustomLogger): api_base=api_base, api_provider=llm_provider, exception_status=exception_status_code, + requested_model=model_group, ).inc() self.litellm_deployment_total_requests.labels( @@ -472,6 +474,7 @@ class PrometheusLogger(CustomLogger): model_id=model_id, api_base=api_base, api_provider=llm_provider, + requested_model=model_group, ).inc() pass @@ -534,7 +537,7 @@ class PrometheusLogger(CustomLogger): """ log these labels - ["litellm_model_name", "model_id", "api_base", "api_provider"] + ["litellm_model_name", "requested_model", model_id", "api_base", "api_provider"] """ self.set_deployment_healthy( litellm_model_name=litellm_model_name, @@ -548,6 +551,7 @@ class PrometheusLogger(CustomLogger): model_id=model_id, api_base=api_base, api_provider=llm_provider, + requested_model=model_group, ).inc() self.litellm_deployment_total_requests.labels( @@ -555,6 +559,7 @@ class PrometheusLogger(CustomLogger): model_id=model_id, api_base=api_base, api_provider=llm_provider, + requested_model=model_group, ).inc() # Track deployment Latency diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 2aeb8d7e7..57873e94a 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -1844,7 +1844,7 @@ class CommonProxyErrors(str, enum.Enum): db_not_connected_error = "DB not connected" no_llm_router = "No models configured on proxy" not_allowed_access = "Admin-only endpoint. Not allowed to access this." - not_premium_user = "You must be a LiteLLM Enterprise user to use this feature. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat" + not_premium_user = "You must be a LiteLLM Enterprise user to use this feature. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat. \nPricing: https://www.litellm.ai/#pricing" class SpendCalculateRequest(LiteLLMBase): diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 678da553a..50f395f4c 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -24,7 +24,9 @@ model_list: general_settings: master_key: sk-1234 + + litellm_settings: success_callback: ["gcs_bucket"] - +