forked from phoenix/litellm-mirror
support using "callbacks" for prometheus
This commit is contained in:
parent
3bf6589fab
commit
e253c100f4
3 changed files with 70 additions and 0 deletions
|
@ -228,6 +228,16 @@ def initialize_callbacks_on_proxy(
|
|||
litellm.callbacks.extend(imported_list)
|
||||
else:
|
||||
litellm.callbacks = imported_list # type: ignore
|
||||
|
||||
if "prometheus" in value:
|
||||
from litellm.proxy.proxy_server import app
|
||||
|
||||
verbose_proxy_logger.debug("Starting Prometheus Metrics on /metrics")
|
||||
from prometheus_client import make_asgi_app
|
||||
|
||||
# Add prometheus asgi middleware to route /metrics requests
|
||||
metrics_app = make_asgi_app()
|
||||
app.mount("/metrics", metrics_app)
|
||||
else:
|
||||
litellm.callbacks = [
|
||||
get_instance_fn(
|
||||
|
|
|
@ -15,6 +15,9 @@ guardrails:
|
|||
mode: "pre_call" # pre_call, during_call, post_call
|
||||
output_parse_pii: True
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["prometheus"]
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
alerting: ["slack"]
|
||||
|
|
|
@ -79,3 +79,60 @@ async def test_async_prometheus_success_logging():
|
|||
assert metrics["litellm_deployment_success_responses_total"] == 1.0
|
||||
assert metrics["litellm_deployment_total_requests_total"] == 1.0
|
||||
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_async_prometheus_success_logging_with_callbacks():
|
||||
run_id = str(uuid.uuid4())
|
||||
litellm.set_verbose = True
|
||||
litellm.callbacks = ["prometheus"]
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model="claude-instant-1.2",
|
||||
messages=[{"role": "user", "content": "what llm are u"}],
|
||||
max_tokens=10,
|
||||
mock_response="hi",
|
||||
temperature=0.2,
|
||||
metadata={
|
||||
"id": run_id,
|
||||
"tags": ["tag1", "tag2"],
|
||||
"user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
|
||||
"user_api_key_alias": "ishaans-prometheus-key",
|
||||
"user_api_end_user_max_budget": None,
|
||||
"litellm_api_version": "1.40.19",
|
||||
"global_max_parallel_requests": None,
|
||||
"user_api_key_user_id": "admin",
|
||||
"user_api_key_org_id": None,
|
||||
"user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
|
||||
"user_api_key_team_alias": "testing-team",
|
||||
},
|
||||
)
|
||||
print(response)
|
||||
await asyncio.sleep(3)
|
||||
|
||||
# get prometheus logger
|
||||
from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers
|
||||
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, PrometheusLogger):
|
||||
test_prometheus_logger = callback
|
||||
|
||||
print("done with success request")
|
||||
|
||||
print(
|
||||
"vars of test_prometheus_logger",
|
||||
vars(test_prometheus_logger.litellm_requests_metric),
|
||||
)
|
||||
|
||||
# Get the metrics
|
||||
metrics = {}
|
||||
for metric in REGISTRY.collect():
|
||||
for sample in metric.samples:
|
||||
metrics[sample.name] = sample.value
|
||||
|
||||
print("metrics from prometheus", metrics)
|
||||
assert metrics["litellm_requests_metric_total"] == 1.0
|
||||
assert metrics["litellm_total_tokens_total"] == 30.0
|
||||
assert metrics["litellm_deployment_success_responses_total"] == 1.0
|
||||
assert metrics["litellm_deployment_total_requests_total"] == 1.0
|
||||
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue