From c68cfce912783a5d131da8489c82ca60e23f8bef Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 26 Sep 2024 18:01:03 -0700 Subject: [PATCH] track api key and alias in remaining tokens metric (#5924) --- litellm/integrations/prometheus.py | 28 +++++++++++++++---- ...odel_prices_and_context_window_backup.json | 11 ++++++++ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 4ce17cc8bb..2f066949c3 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -16,6 +16,7 @@ import litellm from litellm._logging import print_verbose, verbose_logger from litellm.integrations.custom_logger import CustomLogger from litellm.proxy._types import UserAPIKeyAuth +from litellm.types.utils import StandardLoggingPayload REQUESTED_MODEL = "requested_model" EXCEPTION_STATUS = "exception_status" @@ -191,6 +192,8 @@ class PrometheusLogger(CustomLogger): "api_provider", "api_base", "litellm_model_name", + "hashed_api_key", + "api_key_alias", ], ) @@ -202,6 +205,8 @@ class PrometheusLogger(CustomLogger): "api_provider", "api_base", "litellm_model_name", + "hashed_api_key", + "api_key_alias", ], ) # Get all keys @@ -628,14 +633,15 @@ class PrometheusLogger(CustomLogger): ): try: verbose_logger.debug("setting remaining tokens requests metric") + standard_logging_payload: StandardLoggingPayload = request_kwargs.get( + "standard_logging_object", {} + ) + model_group = standard_logging_payload["model_group"] + api_base = standard_logging_payload["api_base"] _response_headers = request_kwargs.get("response_headers") _litellm_params = request_kwargs.get("litellm_params", {}) or {} _metadata = _litellm_params.get("metadata", {}) litellm_model_name = request_kwargs.get("model", None) - model_group = _metadata.get("model_group", None) - api_base = _metadata.get("api_base", None) - if api_base is None: - api_base = _litellm_params.get("api_base", None) llm_provider = _litellm_params.get("custom_llm_provider", None) _model_info = _metadata.get("model_info") or {} model_id = _model_info.get("id", None) @@ -665,12 +671,22 @@ class PrometheusLogger(CustomLogger): "litellm_model_name" """ self.litellm_remaining_requests_metric.labels( - model_group, llm_provider, api_base, litellm_model_name + model_group, + llm_provider, + api_base, + litellm_model_name, + standard_logging_payload["metadata"]["user_api_key_hash"], + standard_logging_payload["metadata"]["user_api_key_alias"], ).set(remaining_requests) if remaining_tokens: self.litellm_remaining_tokens_metric.labels( - model_group, llm_provider, api_base, litellm_model_name + model_group, + llm_provider, + api_base, + litellm_model_name, + standard_logging_payload["metadata"]["user_api_key_hash"], + standard_logging_payload["metadata"]["user_api_key_alias"], ).set(remaining_tokens) """ diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 6804d677e6..2831f1a5cc 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -2454,6 +2454,17 @@ "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" }, + "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": { + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "mode": "chat", + "supports_system_messages": true, + "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas" + }, "vertex_ai/mistral-large@latest": { "max_tokens": 8191, "max_input_tokens": 128000,