support using "callbacks" for prometheus

This commit is contained in:
Ishaan Jaff 2024-09-09 08:26:03 -07:00
parent 3bf6589fab
commit e253c100f4
3 changed files with 70 additions and 0 deletions

View file

@ -228,6 +228,16 @@ def initialize_callbacks_on_proxy(
litellm.callbacks.extend(imported_list) litellm.callbacks.extend(imported_list)
else: else:
litellm.callbacks = imported_list # type: ignore litellm.callbacks = imported_list # type: ignore
if "prometheus" in value:
from litellm.proxy.proxy_server import app
verbose_proxy_logger.debug("Starting Prometheus Metrics on /metrics")
from prometheus_client import make_asgi_app
# Add prometheus asgi middleware to route /metrics requests
metrics_app = make_asgi_app()
app.mount("/metrics", metrics_app)
else: else:
litellm.callbacks = [ litellm.callbacks = [
get_instance_fn( get_instance_fn(

View file

@ -15,6 +15,9 @@ guardrails:
mode: "pre_call" # pre_call, during_call, post_call mode: "pre_call" # pre_call, during_call, post_call
output_parse_pii: True output_parse_pii: True
litellm_settings:
callbacks: ["prometheus"]
general_settings: general_settings:
master_key: sk-1234 master_key: sk-1234
alerting: ["slack"] alerting: ["slack"]

View file

@ -79,3 +79,60 @@ async def test_async_prometheus_success_logging():
assert metrics["litellm_deployment_success_responses_total"] == 1.0 assert metrics["litellm_deployment_success_responses_total"] == 1.0
assert metrics["litellm_deployment_total_requests_total"] == 1.0 assert metrics["litellm_deployment_total_requests_total"] == 1.0
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0 assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
@pytest.mark.asyncio()
async def test_async_prometheus_success_logging_with_callbacks():
run_id = str(uuid.uuid4())
litellm.set_verbose = True
litellm.callbacks = ["prometheus"]
response = await litellm.acompletion(
model="claude-instant-1.2",
messages=[{"role": "user", "content": "what llm are u"}],
max_tokens=10,
mock_response="hi",
temperature=0.2,
metadata={
"id": run_id,
"tags": ["tag1", "tag2"],
"user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
"user_api_key_alias": "ishaans-prometheus-key",
"user_api_end_user_max_budget": None,
"litellm_api_version": "1.40.19",
"global_max_parallel_requests": None,
"user_api_key_user_id": "admin",
"user_api_key_org_id": None,
"user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
"user_api_key_team_alias": "testing-team",
},
)
print(response)
await asyncio.sleep(3)
# get prometheus logger
from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers
for callback in _in_memory_loggers:
if isinstance(callback, PrometheusLogger):
test_prometheus_logger = callback
print("done with success request")
print(
"vars of test_prometheus_logger",
vars(test_prometheus_logger.litellm_requests_metric),
)
# Get the metrics
metrics = {}
for metric in REGISTRY.collect():
for sample in metric.samples:
metrics[sample.name] = sample.value
print("metrics from prometheus", metrics)
assert metrics["litellm_requests_metric_total"] == 1.0
assert metrics["litellm_total_tokens_total"] == 30.0
assert metrics["litellm_deployment_success_responses_total"] == 1.0
assert metrics["litellm_deployment_total_requests_total"] == 1.0
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0