forked from phoenix/litellm-mirror
support using "callbacks" for prometheus
This commit is contained in:
parent
3bf6589fab
commit
e253c100f4
3 changed files with 70 additions and 0 deletions
|
@ -228,6 +228,16 @@ def initialize_callbacks_on_proxy(
|
||||||
litellm.callbacks.extend(imported_list)
|
litellm.callbacks.extend(imported_list)
|
||||||
else:
|
else:
|
||||||
litellm.callbacks = imported_list # type: ignore
|
litellm.callbacks = imported_list # type: ignore
|
||||||
|
|
||||||
|
if "prometheus" in value:
|
||||||
|
from litellm.proxy.proxy_server import app
|
||||||
|
|
||||||
|
verbose_proxy_logger.debug("Starting Prometheus Metrics on /metrics")
|
||||||
|
from prometheus_client import make_asgi_app
|
||||||
|
|
||||||
|
# Add prometheus asgi middleware to route /metrics requests
|
||||||
|
metrics_app = make_asgi_app()
|
||||||
|
app.mount("/metrics", metrics_app)
|
||||||
else:
|
else:
|
||||||
litellm.callbacks = [
|
litellm.callbacks = [
|
||||||
get_instance_fn(
|
get_instance_fn(
|
||||||
|
|
|
@ -15,6 +15,9 @@ guardrails:
|
||||||
mode: "pre_call" # pre_call, during_call, post_call
|
mode: "pre_call" # pre_call, during_call, post_call
|
||||||
output_parse_pii: True
|
output_parse_pii: True
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["prometheus"]
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
alerting: ["slack"]
|
alerting: ["slack"]
|
||||||
|
|
|
@ -79,3 +79,60 @@ async def test_async_prometheus_success_logging():
|
||||||
assert metrics["litellm_deployment_success_responses_total"] == 1.0
|
assert metrics["litellm_deployment_success_responses_total"] == 1.0
|
||||||
assert metrics["litellm_deployment_total_requests_total"] == 1.0
|
assert metrics["litellm_deployment_total_requests_total"] == 1.0
|
||||||
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
|
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_async_prometheus_success_logging_with_callbacks():
|
||||||
|
run_id = str(uuid.uuid4())
|
||||||
|
litellm.set_verbose = True
|
||||||
|
litellm.callbacks = ["prometheus"]
|
||||||
|
|
||||||
|
response = await litellm.acompletion(
|
||||||
|
model="claude-instant-1.2",
|
||||||
|
messages=[{"role": "user", "content": "what llm are u"}],
|
||||||
|
max_tokens=10,
|
||||||
|
mock_response="hi",
|
||||||
|
temperature=0.2,
|
||||||
|
metadata={
|
||||||
|
"id": run_id,
|
||||||
|
"tags": ["tag1", "tag2"],
|
||||||
|
"user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
|
||||||
|
"user_api_key_alias": "ishaans-prometheus-key",
|
||||||
|
"user_api_end_user_max_budget": None,
|
||||||
|
"litellm_api_version": "1.40.19",
|
||||||
|
"global_max_parallel_requests": None,
|
||||||
|
"user_api_key_user_id": "admin",
|
||||||
|
"user_api_key_org_id": None,
|
||||||
|
"user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
|
||||||
|
"user_api_key_team_alias": "testing-team",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
# get prometheus logger
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers
|
||||||
|
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, PrometheusLogger):
|
||||||
|
test_prometheus_logger = callback
|
||||||
|
|
||||||
|
print("done with success request")
|
||||||
|
|
||||||
|
print(
|
||||||
|
"vars of test_prometheus_logger",
|
||||||
|
vars(test_prometheus_logger.litellm_requests_metric),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get the metrics
|
||||||
|
metrics = {}
|
||||||
|
for metric in REGISTRY.collect():
|
||||||
|
for sample in metric.samples:
|
||||||
|
metrics[sample.name] = sample.value
|
||||||
|
|
||||||
|
print("metrics from prometheus", metrics)
|
||||||
|
assert metrics["litellm_requests_metric_total"] == 1.0
|
||||||
|
assert metrics["litellm_total_tokens_total"] == 30.0
|
||||||
|
assert metrics["litellm_deployment_success_responses_total"] == 1.0
|
||||||
|
assert metrics["litellm_deployment_total_requests_total"] == 1.0
|
||||||
|
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue