forked from phoenix/litellm-mirror
(feat proxy prometheus) track virtual key, key alias, error code, error code class on prometheus (#5968)
* track api key and team in prom latency metric * add test for latency metric * test prometheus success metrics for latency * track team and key labels for deployment failures * add test for litellm_deployment_failure_responses_total * fix checks for premium user on prometheus * log_success_fallback_event and log_failure_fallback_event * log original_exception in log_success_fallback_event * track key, team and exception status and class on fallback metrics * use get_standard_logging_metadata * fix import error * track litellm_deployment_successful_fallbacks * add test test_proxy_fallback_metrics * add log log_success_fallback_event * fix test prometheus
This commit is contained in:
parent
b817974c8e
commit
49ec40b1cb
5 changed files with 426 additions and 62 deletions
|
@ -5,6 +5,7 @@ Unit tests for prometheus metrics
|
|||
import pytest
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import uuid
|
||||
|
||||
|
||||
async def make_bad_chat_completion_request(session, key):
|
||||
|
@ -23,6 +24,53 @@ async def make_bad_chat_completion_request(session, key):
|
|||
return status, response_text
|
||||
|
||||
|
||||
async def make_good_chat_completion_request(session, key):
|
||||
url = "http://0.0.0.0:4000/chat/completions"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
data = {
|
||||
"model": "fake-openai-endpoint",
|
||||
"messages": [{"role": "user", "content": f"Hello {uuid.uuid4()}"}],
|
||||
"tags": ["teamB"],
|
||||
}
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
status = response.status
|
||||
response_text = await response.text()
|
||||
return status, response_text
|
||||
|
||||
|
||||
async def make_chat_completion_request_with_fallback(session, key):
|
||||
url = "http://0.0.0.0:4000/chat/completions"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
data = {
|
||||
"model": "fake-azure-endpoint",
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"fallbacks": ["fake-openai-endpoint"],
|
||||
}
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
status = response.status
|
||||
response_text = await response.text()
|
||||
|
||||
# make a request with a failed fallback
|
||||
data = {
|
||||
"model": "fake-azure-endpoint",
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"fallbacks": ["unknown-model"],
|
||||
}
|
||||
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
status = response.status
|
||||
response_text = await response.text()
|
||||
|
||||
return
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_failure_metrics():
|
||||
"""
|
||||
|
@ -59,3 +107,77 @@ async def test_proxy_failure_metrics():
|
|||
'litellm_proxy_total_requests_metric_total{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None",user="default_user_id"} 1.0'
|
||||
in metrics
|
||||
)
|
||||
|
||||
assert (
|
||||
'litellm_deployment_failure_responses_total{api_base="https://exampleopenaiendpoint-production.up.railway.app",api_key_alias="None",api_provider="openai",exception_class="RateLimitError",exception_status="429",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="429",model_id="7499d31f98cd518cf54486d5a00deda6894239ce16d13543398dc8abf870b15f",requested_model="fake-azure-endpoint",team="None",team_alias="None"}'
|
||||
in metrics
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_success_metrics():
|
||||
"""
|
||||
Make 1 good /chat/completions call to "openai/gpt-3.5-turbo"
|
||||
GET /metrics
|
||||
Assert the success metric is incremented by 1
|
||||
"""
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Make a good chat completion call
|
||||
status, response_text = await make_good_chat_completion_request(
|
||||
session, "sk-1234"
|
||||
)
|
||||
|
||||
# Check if the request succeeded as expected
|
||||
assert status == 200, f"Expected status 200, but got {status}"
|
||||
|
||||
# Get metrics
|
||||
async with session.get("http://0.0.0.0:4000/metrics") as response:
|
||||
metrics = await response.text()
|
||||
|
||||
print("/metrics", metrics)
|
||||
|
||||
# Check if the success metric is present and correct
|
||||
assert (
|
||||
'litellm_request_total_latency_metric_bucket{api_key_alias="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",team="None",team_alias="None"}'
|
||||
in metrics
|
||||
)
|
||||
|
||||
assert (
|
||||
'litellm_llm_api_latency_metric_bucket{api_key_alias="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",team="None",team_alias="None"}'
|
||||
in metrics
|
||||
)
|
||||
|
||||
assert (
|
||||
'litellm_deployment_latency_per_output_token_count{api_base="https://exampleopenaiendpoint-production.up.railway.app/",api_key_alias="None",api_provider="openai",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="fake",model_id="team-b-model",team="None",team_alias="None"}'
|
||||
in metrics
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_fallback_metrics():
|
||||
"""
|
||||
Make 1 request with a client side fallback - check metrics
|
||||
"""
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Make a good chat completion call
|
||||
await make_chat_completion_request_with_fallback(session, "sk-1234")
|
||||
|
||||
# Get metrics
|
||||
async with session.get("http://0.0.0.0:4000/metrics") as response:
|
||||
metrics = await response.text()
|
||||
|
||||
print("/metrics", metrics)
|
||||
|
||||
# Check if successful fallback metric is incremented
|
||||
assert (
|
||||
'litellm_deployment_successful_fallbacks_total{api_key_alias="None",exception_class="RateLimitError",exception_status="429",fallback_model="fake-openai-endpoint",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None"} 1.0'
|
||||
in metrics
|
||||
)
|
||||
|
||||
# Check if failed fallback metric is incremented
|
||||
assert (
|
||||
'litellm_deployment_failed_fallbacks_total{api_key_alias="None",exception_class="RateLimitError",exception_status="429",fallback_model="unknown-model",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None"} 1.0'
|
||||
in metrics
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue