From e253c100f418eb49d3faafdec5408ed68b6a9ed7 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 08:26:03 -0700 Subject: [PATCH] support using "callbacks" for prometheus --- litellm/proxy/common_utils/callback_utils.py | 10 ++++ litellm/proxy/proxy_config.yaml | 3 ++ litellm/tests/test_prometheus.py | 57 ++++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/litellm/proxy/common_utils/callback_utils.py b/litellm/proxy/common_utils/callback_utils.py index fa976690e..4ccf61e23 100644 --- a/litellm/proxy/common_utils/callback_utils.py +++ b/litellm/proxy/common_utils/callback_utils.py @@ -228,6 +228,16 @@ def initialize_callbacks_on_proxy( litellm.callbacks.extend(imported_list) else: litellm.callbacks = imported_list # type: ignore + + if "prometheus" in value: + from litellm.proxy.proxy_server import app + + verbose_proxy_logger.debug("Starting Prometheus Metrics on /metrics") + from prometheus_client import make_asgi_app + + # Add prometheus asgi middleware to route /metrics requests + metrics_app = make_asgi_app() + app.mount("/metrics", metrics_app) else: litellm.callbacks = [ get_instance_fn( diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 71a356b80..b407b0d7a 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -15,6 +15,9 @@ guardrails: mode: "pre_call" # pre_call, during_call, post_call output_parse_pii: True +litellm_settings: + callbacks: ["prometheus"] + general_settings: master_key: sk-1234 alerting: ["slack"] diff --git a/litellm/tests/test_prometheus.py b/litellm/tests/test_prometheus.py index 7574beb9d..1232130cb 100644 --- a/litellm/tests/test_prometheus.py +++ b/litellm/tests/test_prometheus.py @@ -79,3 +79,60 @@ async def test_async_prometheus_success_logging(): assert metrics["litellm_deployment_success_responses_total"] == 1.0 assert metrics["litellm_deployment_total_requests_total"] == 1.0 assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0 + + +@pytest.mark.asyncio() +async def test_async_prometheus_success_logging_with_callbacks(): + run_id = str(uuid.uuid4()) + litellm.set_verbose = True + litellm.callbacks = ["prometheus"] + + response = await litellm.acompletion( + model="claude-instant-1.2", + messages=[{"role": "user", "content": "what llm are u"}], + max_tokens=10, + mock_response="hi", + temperature=0.2, + metadata={ + "id": run_id, + "tags": ["tag1", "tag2"], + "user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c", + "user_api_key_alias": "ishaans-prometheus-key", + "user_api_end_user_max_budget": None, + "litellm_api_version": "1.40.19", + "global_max_parallel_requests": None, + "user_api_key_user_id": "admin", + "user_api_key_org_id": None, + "user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709", + "user_api_key_team_alias": "testing-team", + }, + ) + print(response) + await asyncio.sleep(3) + + # get prometheus logger + from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers + + for callback in _in_memory_loggers: + if isinstance(callback, PrometheusLogger): + test_prometheus_logger = callback + + print("done with success request") + + print( + "vars of test_prometheus_logger", + vars(test_prometheus_logger.litellm_requests_metric), + ) + + # Get the metrics + metrics = {} + for metric in REGISTRY.collect(): + for sample in metric.samples: + metrics[sample.name] = sample.value + + print("metrics from prometheus", metrics) + assert metrics["litellm_requests_metric_total"] == 1.0 + assert metrics["litellm_total_tokens_total"] == 30.0 + assert metrics["litellm_deployment_success_responses_total"] == 1.0 + assert metrics["litellm_deployment_total_requests_total"] == 1.0 + assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0