forked from phoenix/litellm-mirror
Merge pull request #3932 from BerriAI/litellm_show_num_429_errors_ui
[Feat- admin UI] Show number of rate limit errors by deployment per day
This commit is contained in:
commit
e241b87d37
3 changed files with 518 additions and 13 deletions
|
@ -6859,6 +6859,265 @@ async def get_global_activity_model(
|
|||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/global/activity/exceptions/deployment",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
)
|
||||
async def get_global_activity_exceptions_per_deployment(
|
||||
model_group: str = fastapi.Query(
|
||||
description="Filter by model group",
|
||||
),
|
||||
start_date: Optional[str] = fastapi.Query(
|
||||
default=None,
|
||||
description="Time from which to start viewing spend",
|
||||
),
|
||||
end_date: Optional[str] = fastapi.Query(
|
||||
default=None,
|
||||
description="Time till which to view spend",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Get number of 429 errors - Grouped by deployment
|
||||
|
||||
[
|
||||
{
|
||||
"deployment": "https://azure-us-east-1.openai.azure.com/",
|
||||
"daily_data": [
|
||||
const chartdata = [
|
||||
{
|
||||
date: 'Jan 22',
|
||||
num_rate_limit_exceptions: 10
|
||||
},
|
||||
{
|
||||
date: 'Jan 23',
|
||||
num_rate_limit_exceptions: 12
|
||||
},
|
||||
],
|
||||
"sum_num_rate_limit_exceptions": 20,
|
||||
|
||||
},
|
||||
{
|
||||
"deployment": "https://azure-us-east-1.openai.azure.com/",
|
||||
"daily_data": [
|
||||
const chartdata = [
|
||||
{
|
||||
date: 'Jan 22',
|
||||
num_rate_limit_exceptions: 10,
|
||||
},
|
||||
{
|
||||
date: 'Jan 23',
|
||||
num_rate_limit_exceptions: 12
|
||||
},
|
||||
],
|
||||
"sum_num_rate_limit_exceptions": 20,
|
||||
|
||||
},
|
||||
]
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
if start_date is None or end_date is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={"error": "Please provide start_date and end_date"},
|
||||
)
|
||||
|
||||
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
|
||||
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
|
||||
|
||||
global prisma_client, llm_router, premium_user
|
||||
try:
|
||||
if prisma_client is None:
|
||||
raise Exception(
|
||||
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
|
||||
)
|
||||
|
||||
sql_query = """
|
||||
SELECT
|
||||
api_base,
|
||||
date_trunc('day', "startTime")::date AS date,
|
||||
COUNT(*) AS num_rate_limit_exceptions
|
||||
FROM
|
||||
"LiteLLM_ErrorLogs"
|
||||
WHERE
|
||||
"startTime" >= $1::date
|
||||
AND "startTime" < ($2::date + INTERVAL '1 day')
|
||||
AND model_group = $3
|
||||
AND status_code = '429'
|
||||
GROUP BY
|
||||
api_base,
|
||||
date_trunc('day', "startTime")
|
||||
ORDER BY
|
||||
date;
|
||||
"""
|
||||
db_response = await prisma_client.db.query_raw(
|
||||
sql_query, start_date_obj, end_date_obj, model_group
|
||||
)
|
||||
if db_response is None:
|
||||
return []
|
||||
|
||||
model_ui_data: dict = (
|
||||
{}
|
||||
) # {"gpt-4": {"daily_data": [], "sum_api_requests": 0, "sum_total_tokens": 0}}
|
||||
|
||||
for row in db_response:
|
||||
_model = row["api_base"]
|
||||
if _model not in model_ui_data:
|
||||
model_ui_data[_model] = {
|
||||
"daily_data": [],
|
||||
"sum_num_rate_limit_exceptions": 0,
|
||||
}
|
||||
_date_obj = datetime.fromisoformat(row["date"])
|
||||
row["date"] = _date_obj.strftime("%b %d")
|
||||
|
||||
model_ui_data[_model]["daily_data"].append(row)
|
||||
model_ui_data[_model]["sum_num_rate_limit_exceptions"] += row.get(
|
||||
"num_rate_limit_exceptions", 0
|
||||
)
|
||||
|
||||
# sort mode ui data by sum_api_requests -> get top 10 models
|
||||
model_ui_data = dict(
|
||||
sorted(
|
||||
model_ui_data.items(),
|
||||
key=lambda x: x[1]["sum_num_rate_limit_exceptions"],
|
||||
reverse=True,
|
||||
)[:10]
|
||||
)
|
||||
|
||||
response = []
|
||||
for model, data in model_ui_data.items():
|
||||
_sort_daily_data = sorted(data["daily_data"], key=lambda x: x["date"])
|
||||
|
||||
response.append(
|
||||
{
|
||||
"api_base": model,
|
||||
"daily_data": _sort_daily_data,
|
||||
"sum_num_rate_limit_exceptions": data[
|
||||
"sum_num_rate_limit_exceptions"
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail={"error": str(e)},
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/global/activity/exceptions",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
)
|
||||
async def get_global_activity_exceptions(
|
||||
model_group: str = fastapi.Query(
|
||||
description="Filter by model group",
|
||||
),
|
||||
start_date: Optional[str] = fastapi.Query(
|
||||
default=None,
|
||||
description="Time from which to start viewing spend",
|
||||
),
|
||||
end_date: Optional[str] = fastapi.Query(
|
||||
default=None,
|
||||
description="Time till which to view spend",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Get number of API Requests, total tokens through proxy
|
||||
|
||||
{
|
||||
"daily_data": [
|
||||
const chartdata = [
|
||||
{
|
||||
date: 'Jan 22',
|
||||
num_rate_limit_exceptions: 10,
|
||||
},
|
||||
{
|
||||
date: 'Jan 23',
|
||||
num_rate_limit_exceptions: 10,
|
||||
},
|
||||
],
|
||||
"sum_api_exceptions": 20,
|
||||
}
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
if start_date is None or end_date is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={"error": "Please provide start_date and end_date"},
|
||||
)
|
||||
|
||||
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
|
||||
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
|
||||
|
||||
global prisma_client, llm_router
|
||||
try:
|
||||
if prisma_client is None:
|
||||
raise Exception(
|
||||
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
|
||||
)
|
||||
|
||||
sql_query = """
|
||||
SELECT
|
||||
date_trunc('day', "startTime")::date AS date,
|
||||
COUNT(*) AS num_rate_limit_exceptions
|
||||
FROM
|
||||
"LiteLLM_ErrorLogs"
|
||||
WHERE
|
||||
"startTime" >= $1::date
|
||||
AND "startTime" < ($2::date + INTERVAL '1 day')
|
||||
AND model_group = $3
|
||||
AND status_code = '429'
|
||||
GROUP BY
|
||||
date_trunc('day', "startTime")
|
||||
ORDER BY
|
||||
date;
|
||||
"""
|
||||
db_response = await prisma_client.db.query_raw(
|
||||
sql_query, start_date_obj, end_date_obj, model_group
|
||||
)
|
||||
|
||||
if db_response is None:
|
||||
return []
|
||||
|
||||
sum_num_rate_limit_exceptions = 0
|
||||
daily_data = []
|
||||
for row in db_response:
|
||||
# cast date to datetime
|
||||
_date_obj = datetime.fromisoformat(row["date"])
|
||||
row["date"] = _date_obj.strftime("%b %d")
|
||||
|
||||
daily_data.append(row)
|
||||
sum_num_rate_limit_exceptions += row.get("num_rate_limit_exceptions", 0)
|
||||
|
||||
# sort daily_data by date
|
||||
daily_data = sorted(daily_data, key=lambda x: x["date"])
|
||||
|
||||
data_to_return = {
|
||||
"daily_data": daily_data,
|
||||
"sum_num_rate_limit_exceptions": sum_num_rate_limit_exceptions,
|
||||
}
|
||||
|
||||
return data_to_return
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={"error": str(e)},
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/global/spend/provider",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
|
@ -10577,7 +10836,7 @@ async def model_metrics_exceptions(
|
|||
SELECT
|
||||
CASE WHEN api_base = '' THEN litellm_model_name ELSE CONCAT(litellm_model_name, '-', api_base) END AS combined_model_api_base,
|
||||
exception_type,
|
||||
COUNT(*) AS num_exceptions
|
||||
COUNT(*) AS num_rate_limit_exceptions
|
||||
FROM "LiteLLM_ErrorLogs"
|
||||
WHERE "startTime" >= $1::timestamp AND "endTime" <= $2::timestamp AND model_group = $3
|
||||
GROUP BY combined_model_api_base, exception_type
|
||||
|
@ -10585,7 +10844,7 @@ async def model_metrics_exceptions(
|
|||
SELECT
|
||||
combined_model_api_base,
|
||||
COUNT(*) AS total_exceptions,
|
||||
json_object_agg(exception_type, num_exceptions) AS exception_counts
|
||||
json_object_agg(exception_type, num_rate_limit_exceptions) AS exception_counts
|
||||
FROM cte
|
||||
GROUP BY combined_model_api_base
|
||||
ORDER BY total_exceptions DESC
|
||||
|
|
|
@ -49,6 +49,8 @@ import {
|
|||
getCallbacksCall,
|
||||
setCallbacksCall,
|
||||
modelSettingsCall,
|
||||
adminGlobalActivityExceptions,
|
||||
adminGlobalActivityExceptionsPerDeployment,
|
||||
} from "./networking";
|
||||
import { BarChart, AreaChart } from "@tremor/react";
|
||||
import {
|
||||
|
@ -109,6 +111,13 @@ interface RetryPolicyObject {
|
|||
[key: string]: { [retryPolicyKey: string]: number } | undefined;
|
||||
}
|
||||
|
||||
|
||||
interface GlobalExceptionActivityData {
|
||||
sum_num_rate_limit_exceptions: number;
|
||||
daily_data: { date: string; num_rate_limit_exceptions: number; }[];
|
||||
}
|
||||
|
||||
|
||||
//["OpenAI", "Azure OpenAI", "Anthropic", "Gemini (Google AI Studio)", "Amazon Bedrock", "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)"]
|
||||
|
||||
interface ProviderFields {
|
||||
|
@ -301,6 +310,9 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
useState<RetryPolicyObject | null>(null);
|
||||
const [defaultRetry, setDefaultRetry] = useState<number>(0);
|
||||
|
||||
const [globalExceptionData, setGlobalExceptionData] = useState<GlobalExceptionActivityData>({} as GlobalExceptionActivityData);
|
||||
const [globalExceptionPerDeployment, setGlobalExceptionPerDeployment] = useState<any[]>([]);
|
||||
|
||||
function formatCreatedAt(createdAt: string | null) {
|
||||
if (createdAt) {
|
||||
const date = new Date(createdAt);
|
||||
|
@ -643,6 +655,29 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
dateValue.to?.toISOString()
|
||||
);
|
||||
|
||||
const dailyExceptions = await adminGlobalActivityExceptions(
|
||||
accessToken,
|
||||
dateValue.from?.toISOString().split('T')[0],
|
||||
dateValue.to?.toISOString().split('T')[0],
|
||||
_initial_model_group,
|
||||
);
|
||||
|
||||
setGlobalExceptionData(dailyExceptions);
|
||||
|
||||
const dailyExceptionsPerDeplyment = await adminGlobalActivityExceptionsPerDeployment(
|
||||
accessToken,
|
||||
dateValue.from?.toISOString().split('T')[0],
|
||||
dateValue.to?.toISOString().split('T')[0],
|
||||
_initial_model_group,
|
||||
)
|
||||
|
||||
setGlobalExceptionPerDeployment(dailyExceptionsPerDeplyment);
|
||||
|
||||
console.log("dailyExceptions:", dailyExceptions);
|
||||
|
||||
console.log("dailyExceptionsPerDeplyment:", dailyExceptionsPerDeplyment);
|
||||
|
||||
|
||||
console.log("slowResponses:", slowResponses);
|
||||
|
||||
setSlowResponsesData(slowResponses);
|
||||
|
@ -905,6 +940,30 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
console.log("slowResponses:", slowResponses);
|
||||
|
||||
setSlowResponsesData(slowResponses);
|
||||
|
||||
|
||||
if (modelGroup) {
|
||||
const dailyExceptions = await adminGlobalActivityExceptions(
|
||||
accessToken,
|
||||
startTime?.toISOString().split('T')[0],
|
||||
endTime?.toISOString().split('T')[0],
|
||||
modelGroup,
|
||||
);
|
||||
|
||||
setGlobalExceptionData(dailyExceptions);
|
||||
|
||||
const dailyExceptionsPerDeplyment = await adminGlobalActivityExceptionsPerDeployment(
|
||||
accessToken,
|
||||
startTime?.toISOString().split('T')[0],
|
||||
endTime?.toISOString().split('T')[0],
|
||||
modelGroup,
|
||||
)
|
||||
|
||||
setGlobalExceptionPerDeployment(dailyExceptionsPerDeplyment);
|
||||
|
||||
}
|
||||
|
||||
|
||||
} catch (error) {
|
||||
console.error("Failed to fetch model metrics", error);
|
||||
}
|
||||
|
@ -1782,17 +1841,110 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
</Card>
|
||||
</Col>
|
||||
</Grid>
|
||||
<Card className="mt-4">
|
||||
<Title>Exceptions per Model</Title>
|
||||
<BarChart
|
||||
className="h-72"
|
||||
data={modelExceptions}
|
||||
index="model"
|
||||
categories={allExceptions}
|
||||
stack={true}
|
||||
yAxisWidth={30}
|
||||
/>
|
||||
</Card>
|
||||
|
||||
<Grid numItems={1} className="gap-2 w-full mt-2">
|
||||
<Card>
|
||||
<Title>All Up Rate Limit Errors (429) for {selectedModelGroup}</Title>
|
||||
<Grid numItems={1}>
|
||||
<Col>
|
||||
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Num Rate Limit Errors { (globalExceptionData.sum_num_rate_limit_exceptions)}</Subtitle>
|
||||
<BarChart
|
||||
className="h-40"
|
||||
data={globalExceptionData.daily_data}
|
||||
index="date"
|
||||
colors={['rose']}
|
||||
categories={['num_rate_limit_exceptions']}
|
||||
onValueChange={(v) => console.log(v)}
|
||||
/>
|
||||
</Col>
|
||||
<Col>
|
||||
|
||||
{/* <BarChart
|
||||
className="h-40"
|
||||
data={modelExceptions}
|
||||
index="model"
|
||||
categories={allExceptions}
|
||||
stack={true}
|
||||
yAxisWidth={30}
|
||||
/> */}
|
||||
|
||||
|
||||
</Col>
|
||||
|
||||
</Grid>
|
||||
|
||||
|
||||
</Card>
|
||||
|
||||
{
|
||||
premiumUser ? (
|
||||
<>
|
||||
{globalExceptionPerDeployment.map((globalActivity, index) => (
|
||||
<Card key={index}>
|
||||
<Title>{globalActivity.api_base ? globalActivity.api_base : "Unknown API Base"}</Title>
|
||||
<Grid numItems={1}>
|
||||
<Col>
|
||||
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Num Rate Limit Errors (429) {(globalActivity.sum_num_rate_limit_exceptions)}</Subtitle>
|
||||
<BarChart
|
||||
className="h-40"
|
||||
data={globalActivity.daily_data}
|
||||
index="date"
|
||||
colors={['rose']}
|
||||
categories={['num_rate_limit_exceptions']}
|
||||
|
||||
onValueChange={(v) => console.log(v)}
|
||||
/>
|
||||
|
||||
</Col>
|
||||
</Grid>
|
||||
</Card>
|
||||
))}
|
||||
</>
|
||||
) :
|
||||
<>
|
||||
{globalExceptionPerDeployment && globalExceptionPerDeployment.length > 0 &&
|
||||
globalExceptionPerDeployment.slice(0, 1).map((globalActivity, index) => (
|
||||
<Card key={index}>
|
||||
<Title>✨ Rate Limit Errors by Deployment</Title>
|
||||
<p className="mb-2 text-gray-500 italic text-[12px]">Upgrade to see exceptions for all deployments</p>
|
||||
<Button variant="primary" className="mb-2">
|
||||
<a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank">
|
||||
Get Free Trial
|
||||
</a>
|
||||
</Button>
|
||||
<Card>
|
||||
<Title>{globalActivity.api_base}</Title>
|
||||
<Grid numItems={1}>
|
||||
<Col>
|
||||
<Subtitle
|
||||
style={{
|
||||
fontSize: "15px",
|
||||
fontWeight: "normal",
|
||||
color: "#535452",
|
||||
}}
|
||||
>
|
||||
Num Rate Limit Errors {(globalActivity.sum_num_rate_limit_exceptions)}
|
||||
</Subtitle>
|
||||
<BarChart
|
||||
className="h-40"
|
||||
data={globalActivity.daily_data}
|
||||
index="date"
|
||||
colors={['rose']}
|
||||
categories={['num_rate_limit_exceptions']}
|
||||
|
||||
onValueChange={(v) => console.log(v)}
|
||||
/>
|
||||
</Col>
|
||||
|
||||
|
||||
</Grid>
|
||||
</Card>
|
||||
</Card>
|
||||
))}
|
||||
</>
|
||||
}
|
||||
</Grid>
|
||||
|
||||
</TabPanel>
|
||||
<TabPanel>
|
||||
<div className="flex items-center">
|
||||
|
|
|
@ -1195,6 +1195,100 @@ export const adminGlobalActivityPerModel = async (
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
export const adminGlobalActivityExceptions = async (
|
||||
accessToken: String,
|
||||
startTime: String | undefined,
|
||||
endTime: String | undefined,
|
||||
modelGroup: String,
|
||||
) => {
|
||||
try {
|
||||
let url = proxyBaseUrl
|
||||
? `${proxyBaseUrl}/global/activity/exceptions`
|
||||
: `/global/activity/exceptions`;
|
||||
|
||||
if (startTime && endTime) {
|
||||
url += `?start_date=${startTime}&end_date=${endTime}`;
|
||||
}
|
||||
|
||||
if (modelGroup) {
|
||||
url += `&model_group=${modelGroup}`;
|
||||
}
|
||||
|
||||
const requestOptions: {
|
||||
method: string;
|
||||
headers: {
|
||||
Authorization: string;
|
||||
};
|
||||
} = {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
},
|
||||
};
|
||||
|
||||
const response = await fetch(url, requestOptions);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
const data = await response.json();
|
||||
console.log(data);
|
||||
return data;
|
||||
} catch (error) {
|
||||
console.error("Failed to fetch spend data:", error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
export const adminGlobalActivityExceptionsPerDeployment = async (
|
||||
accessToken: String,
|
||||
startTime: String | undefined,
|
||||
endTime: String | undefined,
|
||||
modelGroup: String,
|
||||
) => {
|
||||
try {
|
||||
let url = proxyBaseUrl
|
||||
? `${proxyBaseUrl}/global/activity/exceptions/deployment`
|
||||
: `/global/activity/exceptions/deployment`;
|
||||
|
||||
if (startTime && endTime) {
|
||||
url += `?start_date=${startTime}&end_date=${endTime}`;
|
||||
}
|
||||
|
||||
if (modelGroup) {
|
||||
url += `&model_group=${modelGroup}`;
|
||||
}
|
||||
|
||||
const requestOptions: {
|
||||
method: string;
|
||||
headers: {
|
||||
Authorization: string;
|
||||
};
|
||||
} = {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
},
|
||||
};
|
||||
|
||||
const response = await fetch(url, requestOptions);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
const data = await response.json();
|
||||
console.log(data);
|
||||
return data;
|
||||
} catch (error) {
|
||||
console.error("Failed to fetch spend data:", error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
export const adminTopModelsCall = async (accessToken: String) => {
|
||||
try {
|
||||
let url = proxyBaseUrl
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue