Merge pull request #3932 from BerriAI/litellm_show_num_429_errors_ui

[Feat- admin UI] Show number of rate limit errors by deployment per day
This commit is contained in:
Ishaan Jaff 2024-05-30 20:56:50 -07:00 committed by GitHub
commit e241b87d37
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 518 additions and 13 deletions

View file

@ -6859,6 +6859,265 @@ async def get_global_activity_model(
)
@router.get(
"/global/activity/exceptions/deployment",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
)
async def get_global_activity_exceptions_per_deployment(
model_group: str = fastapi.Query(
description="Filter by model group",
),
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view spend",
),
):
"""
Get number of 429 errors - Grouped by deployment
[
{
"deployment": "https://azure-us-east-1.openai.azure.com/",
"daily_data": [
const chartdata = [
{
date: 'Jan 22',
num_rate_limit_exceptions: 10
},
{
date: 'Jan 23',
num_rate_limit_exceptions: 12
},
],
"sum_num_rate_limit_exceptions": 20,
},
{
"deployment": "https://azure-us-east-1.openai.azure.com/",
"daily_data": [
const chartdata = [
{
date: 'Jan 22',
num_rate_limit_exceptions: 10,
},
{
date: 'Jan 23',
num_rate_limit_exceptions: 12
},
],
"sum_num_rate_limit_exceptions": 20,
},
]
"""
from collections import defaultdict
if start_date is None or end_date is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "Please provide start_date and end_date"},
)
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
global prisma_client, llm_router, premium_user
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
sql_query = """
SELECT
api_base,
date_trunc('day', "startTime")::date AS date,
COUNT(*) AS num_rate_limit_exceptions
FROM
"LiteLLM_ErrorLogs"
WHERE
"startTime" >= $1::date
AND "startTime" < ($2::date + INTERVAL '1 day')
AND model_group = $3
AND status_code = '429'
GROUP BY
api_base,
date_trunc('day', "startTime")
ORDER BY
date;
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj, model_group
)
if db_response is None:
return []
model_ui_data: dict = (
{}
) # {"gpt-4": {"daily_data": [], "sum_api_requests": 0, "sum_total_tokens": 0}}
for row in db_response:
_model = row["api_base"]
if _model not in model_ui_data:
model_ui_data[_model] = {
"daily_data": [],
"sum_num_rate_limit_exceptions": 0,
}
_date_obj = datetime.fromisoformat(row["date"])
row["date"] = _date_obj.strftime("%b %d")
model_ui_data[_model]["daily_data"].append(row)
model_ui_data[_model]["sum_num_rate_limit_exceptions"] += row.get(
"num_rate_limit_exceptions", 0
)
# sort mode ui data by sum_api_requests -> get top 10 models
model_ui_data = dict(
sorted(
model_ui_data.items(),
key=lambda x: x[1]["sum_num_rate_limit_exceptions"],
reverse=True,
)[:10]
)
response = []
for model, data in model_ui_data.items():
_sort_daily_data = sorted(data["daily_data"], key=lambda x: x["date"])
response.append(
{
"api_base": model,
"daily_data": _sort_daily_data,
"sum_num_rate_limit_exceptions": data[
"sum_num_rate_limit_exceptions"
],
}
)
return response
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={"error": str(e)},
)
@router.get(
"/global/activity/exceptions",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
)
async def get_global_activity_exceptions(
model_group: str = fastapi.Query(
description="Filter by model group",
),
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view spend",
),
):
"""
Get number of API Requests, total tokens through proxy
{
"daily_data": [
const chartdata = [
{
date: 'Jan 22',
num_rate_limit_exceptions: 10,
},
{
date: 'Jan 23',
num_rate_limit_exceptions: 10,
},
],
"sum_api_exceptions": 20,
}
"""
from collections import defaultdict
if start_date is None or end_date is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "Please provide start_date and end_date"},
)
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
global prisma_client, llm_router
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
sql_query = """
SELECT
date_trunc('day', "startTime")::date AS date,
COUNT(*) AS num_rate_limit_exceptions
FROM
"LiteLLM_ErrorLogs"
WHERE
"startTime" >= $1::date
AND "startTime" < ($2::date + INTERVAL '1 day')
AND model_group = $3
AND status_code = '429'
GROUP BY
date_trunc('day', "startTime")
ORDER BY
date;
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj, model_group
)
if db_response is None:
return []
sum_num_rate_limit_exceptions = 0
daily_data = []
for row in db_response:
# cast date to datetime
_date_obj = datetime.fromisoformat(row["date"])
row["date"] = _date_obj.strftime("%b %d")
daily_data.append(row)
sum_num_rate_limit_exceptions += row.get("num_rate_limit_exceptions", 0)
# sort daily_data by date
daily_data = sorted(daily_data, key=lambda x: x["date"])
data_to_return = {
"daily_data": daily_data,
"sum_num_rate_limit_exceptions": sum_num_rate_limit_exceptions,
}
return data_to_return
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": str(e)},
)
@router.get(
"/global/spend/provider",
tags=["Budget & Spend Tracking"],
@ -10577,7 +10836,7 @@ async def model_metrics_exceptions(
SELECT
CASE WHEN api_base = '' THEN litellm_model_name ELSE CONCAT(litellm_model_name, '-', api_base) END AS combined_model_api_base,
exception_type,
COUNT(*) AS num_exceptions
COUNT(*) AS num_rate_limit_exceptions
FROM "LiteLLM_ErrorLogs"
WHERE "startTime" >= $1::timestamp AND "endTime" <= $2::timestamp AND model_group = $3
GROUP BY combined_model_api_base, exception_type
@ -10585,7 +10844,7 @@ async def model_metrics_exceptions(
SELECT
combined_model_api_base,
COUNT(*) AS total_exceptions,
json_object_agg(exception_type, num_exceptions) AS exception_counts
json_object_agg(exception_type, num_rate_limit_exceptions) AS exception_counts
FROM cte
GROUP BY combined_model_api_base
ORDER BY total_exceptions DESC

View file

@ -49,6 +49,8 @@ import {
getCallbacksCall,
setCallbacksCall,
modelSettingsCall,
adminGlobalActivityExceptions,
adminGlobalActivityExceptionsPerDeployment,
} from "./networking";
import { BarChart, AreaChart } from "@tremor/react";
import {
@ -109,6 +111,13 @@ interface RetryPolicyObject {
[key: string]: { [retryPolicyKey: string]: number } | undefined;
}
interface GlobalExceptionActivityData {
sum_num_rate_limit_exceptions: number;
daily_data: { date: string; num_rate_limit_exceptions: number; }[];
}
//["OpenAI", "Azure OpenAI", "Anthropic", "Gemini (Google AI Studio)", "Amazon Bedrock", "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)"]
interface ProviderFields {
@ -301,6 +310,9 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
useState<RetryPolicyObject | null>(null);
const [defaultRetry, setDefaultRetry] = useState<number>(0);
const [globalExceptionData, setGlobalExceptionData] = useState<GlobalExceptionActivityData>({} as GlobalExceptionActivityData);
const [globalExceptionPerDeployment, setGlobalExceptionPerDeployment] = useState<any[]>([]);
function formatCreatedAt(createdAt: string | null) {
if (createdAt) {
const date = new Date(createdAt);
@ -643,6 +655,29 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
dateValue.to?.toISOString()
);
const dailyExceptions = await adminGlobalActivityExceptions(
accessToken,
dateValue.from?.toISOString().split('T')[0],
dateValue.to?.toISOString().split('T')[0],
_initial_model_group,
);
setGlobalExceptionData(dailyExceptions);
const dailyExceptionsPerDeplyment = await adminGlobalActivityExceptionsPerDeployment(
accessToken,
dateValue.from?.toISOString().split('T')[0],
dateValue.to?.toISOString().split('T')[0],
_initial_model_group,
)
setGlobalExceptionPerDeployment(dailyExceptionsPerDeplyment);
console.log("dailyExceptions:", dailyExceptions);
console.log("dailyExceptionsPerDeplyment:", dailyExceptionsPerDeplyment);
console.log("slowResponses:", slowResponses);
setSlowResponsesData(slowResponses);
@ -905,6 +940,30 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
console.log("slowResponses:", slowResponses);
setSlowResponsesData(slowResponses);
if (modelGroup) {
const dailyExceptions = await adminGlobalActivityExceptions(
accessToken,
startTime?.toISOString().split('T')[0],
endTime?.toISOString().split('T')[0],
modelGroup,
);
setGlobalExceptionData(dailyExceptions);
const dailyExceptionsPerDeplyment = await adminGlobalActivityExceptionsPerDeployment(
accessToken,
startTime?.toISOString().split('T')[0],
endTime?.toISOString().split('T')[0],
modelGroup,
)
setGlobalExceptionPerDeployment(dailyExceptionsPerDeplyment);
}
} catch (error) {
console.error("Failed to fetch model metrics", error);
}
@ -1782,17 +1841,110 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
</Card>
</Col>
</Grid>
<Card className="mt-4">
<Title>Exceptions per Model</Title>
<BarChart
className="h-72"
data={modelExceptions}
index="model"
categories={allExceptions}
stack={true}
yAxisWidth={30}
/>
</Card>
<Grid numItems={1} className="gap-2 w-full mt-2">
<Card>
<Title>All Up Rate Limit Errors (429) for {selectedModelGroup}</Title>
<Grid numItems={1}>
<Col>
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Num Rate Limit Errors { (globalExceptionData.sum_num_rate_limit_exceptions)}</Subtitle>
<BarChart
className="h-40"
data={globalExceptionData.daily_data}
index="date"
colors={['rose']}
categories={['num_rate_limit_exceptions']}
onValueChange={(v) => console.log(v)}
/>
</Col>
<Col>
{/* <BarChart
className="h-40"
data={modelExceptions}
index="model"
categories={allExceptions}
stack={true}
yAxisWidth={30}
/> */}
</Col>
</Grid>
</Card>
{
premiumUser ? (
<>
{globalExceptionPerDeployment.map((globalActivity, index) => (
<Card key={index}>
<Title>{globalActivity.api_base ? globalActivity.api_base : "Unknown API Base"}</Title>
<Grid numItems={1}>
<Col>
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Num Rate Limit Errors (429) {(globalActivity.sum_num_rate_limit_exceptions)}</Subtitle>
<BarChart
className="h-40"
data={globalActivity.daily_data}
index="date"
colors={['rose']}
categories={['num_rate_limit_exceptions']}
onValueChange={(v) => console.log(v)}
/>
</Col>
</Grid>
</Card>
))}
</>
) :
<>
{globalExceptionPerDeployment && globalExceptionPerDeployment.length > 0 &&
globalExceptionPerDeployment.slice(0, 1).map((globalActivity, index) => (
<Card key={index}>
<Title> Rate Limit Errors by Deployment</Title>
<p className="mb-2 text-gray-500 italic text-[12px]">Upgrade to see exceptions for all deployments</p>
<Button variant="primary" className="mb-2">
<a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank">
Get Free Trial
</a>
</Button>
<Card>
<Title>{globalActivity.api_base}</Title>
<Grid numItems={1}>
<Col>
<Subtitle
style={{
fontSize: "15px",
fontWeight: "normal",
color: "#535452",
}}
>
Num Rate Limit Errors {(globalActivity.sum_num_rate_limit_exceptions)}
</Subtitle>
<BarChart
className="h-40"
data={globalActivity.daily_data}
index="date"
colors={['rose']}
categories={['num_rate_limit_exceptions']}
onValueChange={(v) => console.log(v)}
/>
</Col>
</Grid>
</Card>
</Card>
))}
</>
}
</Grid>
</TabPanel>
<TabPanel>
<div className="flex items-center">

View file

@ -1195,6 +1195,100 @@ export const adminGlobalActivityPerModel = async (
}
};
export const adminGlobalActivityExceptions = async (
accessToken: String,
startTime: String | undefined,
endTime: String | undefined,
modelGroup: String,
) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/global/activity/exceptions`
: `/global/activity/exceptions`;
if (startTime && endTime) {
url += `?start_date=${startTime}&end_date=${endTime}`;
}
if (modelGroup) {
url += `&model_group=${modelGroup}`;
}
const requestOptions: {
method: string;
headers: {
Authorization: string;
};
} = {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
},
};
const response = await fetch(url, requestOptions);
if (!response.ok) {
const errorData = await response.text();
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log(data);
return data;
} catch (error) {
console.error("Failed to fetch spend data:", error);
throw error;
}
};
export const adminGlobalActivityExceptionsPerDeployment = async (
accessToken: String,
startTime: String | undefined,
endTime: String | undefined,
modelGroup: String,
) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/global/activity/exceptions/deployment`
: `/global/activity/exceptions/deployment`;
if (startTime && endTime) {
url += `?start_date=${startTime}&end_date=${endTime}`;
}
if (modelGroup) {
url += `&model_group=${modelGroup}`;
}
const requestOptions: {
method: string;
headers: {
Authorization: string;
};
} = {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
},
};
const response = await fetch(url, requestOptions);
if (!response.ok) {
const errorData = await response.text();
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log(data);
return data;
} catch (error) {
console.error("Failed to fetch spend data:", error);
throw error;
}
};
export const adminTopModelsCall = async (accessToken: String) => {
try {
let url = proxyBaseUrl