forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_openmeter_integration
This commit is contained in:
commit
fffbb73465
31 changed files with 227 additions and 98 deletions
|
@ -7550,7 +7550,7 @@ async def model_metrics(
|
|||
"LiteLLM_SpendLogs"
|
||||
WHERE
|
||||
"startTime" >= NOW() - INTERVAL '30 days'
|
||||
AND "model" = $1
|
||||
AND "model" = $1 AND "cache_hit" != 'True'
|
||||
GROUP BY
|
||||
api_base,
|
||||
model,
|
||||
|
@ -7599,7 +7599,7 @@ async def model_metrics(
|
|||
for day in _daily_entries:
|
||||
entry = {"date": str(day)}
|
||||
for model_key, latency in _daily_entries[day].items():
|
||||
entry[model_key] = round(latency, 8)
|
||||
entry[model_key] = latency
|
||||
response.append(entry)
|
||||
|
||||
return {
|
||||
|
@ -7608,6 +7608,67 @@ async def model_metrics(
|
|||
}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/model/metrics/slow_responses",
|
||||
description="View number of hanging requests per model_group",
|
||||
tags=["model management"],
|
||||
include_in_schema=False,
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def model_metrics_slow_responses(
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
_selected_model_group: Optional[str] = "gpt-4-32k",
|
||||
startTime: Optional[datetime] = None,
|
||||
endTime: Optional[datetime] = None,
|
||||
):
|
||||
global prisma_client, llm_router, proxy_logging_obj
|
||||
if prisma_client is None:
|
||||
raise ProxyException(
|
||||
message="Prisma Client is not initialized",
|
||||
type="internal_error",
|
||||
param="None",
|
||||
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
startTime = startTime or datetime.now() - timedelta(days=30)
|
||||
endTime = endTime or datetime.now()
|
||||
|
||||
alerting_threshold = (
|
||||
proxy_logging_obj.slack_alerting_instance.alerting_threshold or 300
|
||||
)
|
||||
alerting_threshold = int(alerting_threshold)
|
||||
|
||||
sql_query = """
|
||||
SELECT
|
||||
api_base,
|
||||
COUNT(*) AS total_count,
|
||||
SUM(CASE
|
||||
WHEN ("endTime" - "startTime") >= (INTERVAL '1 SECOND' * CAST($1 AS INTEGER)) THEN 1
|
||||
ELSE 0
|
||||
END) AS slow_count
|
||||
FROM
|
||||
"LiteLLM_SpendLogs"
|
||||
WHERE
|
||||
"model" = $2
|
||||
AND "cache_hit" != 'True'
|
||||
GROUP BY
|
||||
api_base
|
||||
ORDER BY
|
||||
slow_count DESC;
|
||||
"""
|
||||
|
||||
db_response = await prisma_client.db.query_raw(
|
||||
sql_query, alerting_threshold, _selected_model_group
|
||||
)
|
||||
|
||||
if db_response is not None:
|
||||
for row in db_response:
|
||||
_api_base = row.get("api_base") or ""
|
||||
if "/openai/" in _api_base:
|
||||
_api_base = _api_base.split("/openai/")[0]
|
||||
row["api_base"] = _api_base
|
||||
return db_response
|
||||
|
||||
|
||||
@router.get(
|
||||
"/model/metrics/exceptions",
|
||||
description="View number of failed requests per model on config.yaml",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue