diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 0c61e8429..b6e27cba6 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -6859,6 +6859,265 @@ async def get_global_activity_model( ) +@router.get( + "/global/activity/exceptions/deployment", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, +) +async def get_global_activity_exceptions_per_deployment( + model_group: str = fastapi.Query( + description="Filter by model group", + ), + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view spend", + ), +): + """ + Get number of 429 errors - Grouped by deployment + + [ + { + "deployment": "https://azure-us-east-1.openai.azure.com/", + "daily_data": [ + const chartdata = [ + { + date: 'Jan 22', + num_rate_limit_exceptions: 10 + }, + { + date: 'Jan 23', + num_rate_limit_exceptions: 12 + }, + ], + "sum_num_rate_limit_exceptions": 20, + + }, + { + "deployment": "https://azure-us-east-1.openai.azure.com/", + "daily_data": [ + const chartdata = [ + { + date: 'Jan 22', + num_rate_limit_exceptions: 10, + }, + { + date: 'Jan 23', + num_rate_limit_exceptions: 12 + }, + ], + "sum_num_rate_limit_exceptions": 20, + + }, + ] + """ + from collections import defaultdict + + if start_date is None or end_date is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": "Please provide start_date and end_date"}, + ) + + start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") + end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") + + global prisma_client, llm_router, premium_user + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + sql_query = """ + SELECT + api_base, + date_trunc('day', "startTime")::date AS date, + COUNT(*) AS num_rate_limit_exceptions + FROM + "LiteLLM_ErrorLogs" + WHERE + "startTime" >= $1::date + AND "startTime" < ($2::date + INTERVAL '1 day') + AND model_group = $3 + AND status_code = '429' + GROUP BY + api_base, + date_trunc('day', "startTime") + ORDER BY + date; + """ + db_response = await prisma_client.db.query_raw( + sql_query, start_date_obj, end_date_obj, model_group + ) + if db_response is None: + return [] + + model_ui_data: dict = ( + {} + ) # {"gpt-4": {"daily_data": [], "sum_api_requests": 0, "sum_total_tokens": 0}} + + for row in db_response: + _model = row["api_base"] + if _model not in model_ui_data: + model_ui_data[_model] = { + "daily_data": [], + "sum_num_rate_limit_exceptions": 0, + } + _date_obj = datetime.fromisoformat(row["date"]) + row["date"] = _date_obj.strftime("%b %d") + + model_ui_data[_model]["daily_data"].append(row) + model_ui_data[_model]["sum_num_rate_limit_exceptions"] += row.get( + "num_rate_limit_exceptions", 0 + ) + + # sort mode ui data by sum_api_requests -> get top 10 models + model_ui_data = dict( + sorted( + model_ui_data.items(), + key=lambda x: x[1]["sum_num_rate_limit_exceptions"], + reverse=True, + )[:10] + ) + + response = [] + for model, data in model_ui_data.items(): + _sort_daily_data = sorted(data["daily_data"], key=lambda x: x["date"]) + + response.append( + { + "api_base": model, + "daily_data": _sort_daily_data, + "sum_num_rate_limit_exceptions": data[ + "sum_num_rate_limit_exceptions" + ], + } + ) + + return response + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail={"error": str(e)}, + ) + + +@router.get( + "/global/activity/exceptions", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, +) +async def get_global_activity_exceptions( + model_group: str = fastapi.Query( + description="Filter by model group", + ), + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view spend", + ), +): + """ + Get number of API Requests, total tokens through proxy + + { + "daily_data": [ + const chartdata = [ + { + date: 'Jan 22', + num_rate_limit_exceptions: 10, + }, + { + date: 'Jan 23', + num_rate_limit_exceptions: 10, + }, + ], + "sum_api_exceptions": 20, + } + """ + from collections import defaultdict + + if start_date is None or end_date is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": "Please provide start_date and end_date"}, + ) + + start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") + end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") + + global prisma_client, llm_router + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + sql_query = """ + SELECT + date_trunc('day', "startTime")::date AS date, + COUNT(*) AS num_rate_limit_exceptions + FROM + "LiteLLM_ErrorLogs" + WHERE + "startTime" >= $1::date + AND "startTime" < ($2::date + INTERVAL '1 day') + AND model_group = $3 + AND status_code = '429' + GROUP BY + date_trunc('day', "startTime") + ORDER BY + date; + """ + db_response = await prisma_client.db.query_raw( + sql_query, start_date_obj, end_date_obj, model_group + ) + + if db_response is None: + return [] + + sum_num_rate_limit_exceptions = 0 + daily_data = [] + for row in db_response: + # cast date to datetime + _date_obj = datetime.fromisoformat(row["date"]) + row["date"] = _date_obj.strftime("%b %d") + + daily_data.append(row) + sum_num_rate_limit_exceptions += row.get("num_rate_limit_exceptions", 0) + + # sort daily_data by date + daily_data = sorted(daily_data, key=lambda x: x["date"]) + + data_to_return = { + "daily_data": daily_data, + "sum_num_rate_limit_exceptions": sum_num_rate_limit_exceptions, + } + + return data_to_return + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": str(e)}, + ) + + @router.get( "/global/spend/provider", tags=["Budget & Spend Tracking"], @@ -10577,7 +10836,7 @@ async def model_metrics_exceptions( SELECT CASE WHEN api_base = '' THEN litellm_model_name ELSE CONCAT(litellm_model_name, '-', api_base) END AS combined_model_api_base, exception_type, - COUNT(*) AS num_exceptions + COUNT(*) AS num_rate_limit_exceptions FROM "LiteLLM_ErrorLogs" WHERE "startTime" >= $1::timestamp AND "endTime" <= $2::timestamp AND model_group = $3 GROUP BY combined_model_api_base, exception_type @@ -10585,7 +10844,7 @@ async def model_metrics_exceptions( SELECT combined_model_api_base, COUNT(*) AS total_exceptions, - json_object_agg(exception_type, num_exceptions) AS exception_counts + json_object_agg(exception_type, num_rate_limit_exceptions) AS exception_counts FROM cte GROUP BY combined_model_api_base ORDER BY total_exceptions DESC diff --git a/ui/litellm-dashboard/src/components/model_dashboard.tsx b/ui/litellm-dashboard/src/components/model_dashboard.tsx index 66e0bd634..2ec5c6089 100644 --- a/ui/litellm-dashboard/src/components/model_dashboard.tsx +++ b/ui/litellm-dashboard/src/components/model_dashboard.tsx @@ -49,6 +49,8 @@ import { getCallbacksCall, setCallbacksCall, modelSettingsCall, + adminGlobalActivityExceptions, + adminGlobalActivityExceptionsPerDeployment, } from "./networking"; import { BarChart, AreaChart } from "@tremor/react"; import { @@ -109,6 +111,13 @@ interface RetryPolicyObject { [key: string]: { [retryPolicyKey: string]: number } | undefined; } + +interface GlobalExceptionActivityData { + sum_num_rate_limit_exceptions: number; + daily_data: { date: string; num_rate_limit_exceptions: number; }[]; +} + + //["OpenAI", "Azure OpenAI", "Anthropic", "Gemini (Google AI Studio)", "Amazon Bedrock", "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)"] interface ProviderFields { @@ -301,6 +310,9 @@ const ModelDashboard: React.FC = ({ useState(null); const [defaultRetry, setDefaultRetry] = useState(0); + const [globalExceptionData, setGlobalExceptionData] = useState({} as GlobalExceptionActivityData); + const [globalExceptionPerDeployment, setGlobalExceptionPerDeployment] = useState([]); + function formatCreatedAt(createdAt: string | null) { if (createdAt) { const date = new Date(createdAt); @@ -643,6 +655,29 @@ const ModelDashboard: React.FC = ({ dateValue.to?.toISOString() ); + const dailyExceptions = await adminGlobalActivityExceptions( + accessToken, + dateValue.from?.toISOString().split('T')[0], + dateValue.to?.toISOString().split('T')[0], + _initial_model_group, + ); + + setGlobalExceptionData(dailyExceptions); + + const dailyExceptionsPerDeplyment = await adminGlobalActivityExceptionsPerDeployment( + accessToken, + dateValue.from?.toISOString().split('T')[0], + dateValue.to?.toISOString().split('T')[0], + _initial_model_group, + ) + + setGlobalExceptionPerDeployment(dailyExceptionsPerDeplyment); + + console.log("dailyExceptions:", dailyExceptions); + + console.log("dailyExceptionsPerDeplyment:", dailyExceptionsPerDeplyment); + + console.log("slowResponses:", slowResponses); setSlowResponsesData(slowResponses); @@ -905,6 +940,30 @@ const ModelDashboard: React.FC = ({ console.log("slowResponses:", slowResponses); setSlowResponsesData(slowResponses); + + + if (modelGroup) { + const dailyExceptions = await adminGlobalActivityExceptions( + accessToken, + startTime?.toISOString().split('T')[0], + endTime?.toISOString().split('T')[0], + modelGroup, + ); + + setGlobalExceptionData(dailyExceptions); + + const dailyExceptionsPerDeplyment = await adminGlobalActivityExceptionsPerDeployment( + accessToken, + startTime?.toISOString().split('T')[0], + endTime?.toISOString().split('T')[0], + modelGroup, + ) + + setGlobalExceptionPerDeployment(dailyExceptionsPerDeplyment); + + } + + } catch (error) { console.error("Failed to fetch model metrics", error); } @@ -1782,17 +1841,110 @@ const ModelDashboard: React.FC = ({ - - Exceptions per Model - - + + + + All Up Rate Limit Errors (429) for {selectedModelGroup} + + + Num Rate Limit Errors { (globalExceptionData.sum_num_rate_limit_exceptions)} + console.log(v)} + /> + + + + {/* */} + + + + + + + + + + { + premiumUser ? ( + <> + {globalExceptionPerDeployment.map((globalActivity, index) => ( + + {globalActivity.api_base ? globalActivity.api_base : "Unknown API Base"} + + + Num Rate Limit Errors (429) {(globalActivity.sum_num_rate_limit_exceptions)} + console.log(v)} + /> + + + + + ))} + + ) : + <> + {globalExceptionPerDeployment && globalExceptionPerDeployment.length > 0 && + globalExceptionPerDeployment.slice(0, 1).map((globalActivity, index) => ( + + ✨ Rate Limit Errors by Deployment +

Upgrade to see exceptions for all deployments

+ + + {globalActivity.api_base} + + + + Num Rate Limit Errors {(globalActivity.sum_num_rate_limit_exceptions)} + + console.log(v)} + /> + + + + + +
+ ))} + + } +
+
diff --git a/ui/litellm-dashboard/src/components/networking.tsx b/ui/litellm-dashboard/src/components/networking.tsx index 5299c0392..769e3b834 100644 --- a/ui/litellm-dashboard/src/components/networking.tsx +++ b/ui/litellm-dashboard/src/components/networking.tsx @@ -1195,6 +1195,100 @@ export const adminGlobalActivityPerModel = async ( } }; + + +export const adminGlobalActivityExceptions = async ( + accessToken: String, + startTime: String | undefined, + endTime: String | undefined, + modelGroup: String, +) => { + try { + let url = proxyBaseUrl + ? `${proxyBaseUrl}/global/activity/exceptions` + : `/global/activity/exceptions`; + + if (startTime && endTime) { + url += `?start_date=${startTime}&end_date=${endTime}`; + } + + if (modelGroup) { + url += `&model_group=${modelGroup}`; + } + + const requestOptions: { + method: string; + headers: { + Authorization: string; + }; + } = { + method: "GET", + headers: { + Authorization: `Bearer ${accessToken}`, + }, + }; + + const response = await fetch(url, requestOptions); + + if (!response.ok) { + const errorData = await response.text(); + throw new Error("Network response was not ok"); + } + const data = await response.json(); + console.log(data); + return data; + } catch (error) { + console.error("Failed to fetch spend data:", error); + throw error; + } +}; + +export const adminGlobalActivityExceptionsPerDeployment = async ( + accessToken: String, + startTime: String | undefined, + endTime: String | undefined, + modelGroup: String, +) => { + try { + let url = proxyBaseUrl + ? `${proxyBaseUrl}/global/activity/exceptions/deployment` + : `/global/activity/exceptions/deployment`; + + if (startTime && endTime) { + url += `?start_date=${startTime}&end_date=${endTime}`; + } + + if (modelGroup) { + url += `&model_group=${modelGroup}`; + } + + const requestOptions: { + method: string; + headers: { + Authorization: string; + }; + } = { + method: "GET", + headers: { + Authorization: `Bearer ${accessToken}`, + }, + }; + + const response = await fetch(url, requestOptions); + + if (!response.ok) { + const errorData = await response.text(); + throw new Error("Network response was not ok"); + } + const data = await response.json(); + console.log(data); + return data; + } catch (error) { + console.error("Failed to fetch spend data:", error); + throw error; + } +}; + export const adminTopModelsCall = async (accessToken: String) => { try { let url = proxyBaseUrl