Merge branch 'main' into litellm_openmeter_integration

This commit is contained in:
Krish Dholakia 2024-05-01 21:19:29 -07:00 committed by GitHub
commit fffbb73465
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 227 additions and 98 deletions

View file

@ -7550,7 +7550,7 @@ async def model_metrics(
"LiteLLM_SpendLogs"
WHERE
"startTime" >= NOW() - INTERVAL '30 days'
AND "model" = $1
AND "model" = $1 AND "cache_hit" != 'True'
GROUP BY
api_base,
model,
@ -7599,7 +7599,7 @@ async def model_metrics(
for day in _daily_entries:
entry = {"date": str(day)}
for model_key, latency in _daily_entries[day].items():
entry[model_key] = round(latency, 8)
entry[model_key] = latency
response.append(entry)
return {
@ -7608,6 +7608,67 @@ async def model_metrics(
}
@router.get(
"/model/metrics/slow_responses",
description="View number of hanging requests per model_group",
tags=["model management"],
include_in_schema=False,
dependencies=[Depends(user_api_key_auth)],
)
async def model_metrics_slow_responses(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
_selected_model_group: Optional[str] = "gpt-4-32k",
startTime: Optional[datetime] = None,
endTime: Optional[datetime] = None,
):
global prisma_client, llm_router, proxy_logging_obj
if prisma_client is None:
raise ProxyException(
message="Prisma Client is not initialized",
type="internal_error",
param="None",
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
startTime = startTime or datetime.now() - timedelta(days=30)
endTime = endTime or datetime.now()
alerting_threshold = (
proxy_logging_obj.slack_alerting_instance.alerting_threshold or 300
)
alerting_threshold = int(alerting_threshold)
sql_query = """
SELECT
api_base,
COUNT(*) AS total_count,
SUM(CASE
WHEN ("endTime" - "startTime") >= (INTERVAL '1 SECOND' * CAST($1 AS INTEGER)) THEN 1
ELSE 0
END) AS slow_count
FROM
"LiteLLM_SpendLogs"
WHERE
"model" = $2
AND "cache_hit" != 'True'
GROUP BY
api_base
ORDER BY
slow_count DESC;
"""
db_response = await prisma_client.db.query_raw(
sql_query, alerting_threshold, _selected_model_group
)
if db_response is not None:
for row in db_response:
_api_base = row.get("api_base") or ""
if "/openai/" in _api_base:
_api_base = _api_base.split("/openai/")[0]
row["api_base"] = _api_base
return db_response
@router.get(
"/model/metrics/exceptions",
description="View number of failed requests per model on config.yaml",