forked from phoenix/litellm-mirror
Merge pull request #3390 from BerriAI/litellm_show_slow_responses
[UI] show slow responses + num requests per deployment
This commit is contained in:
commit
b3f5ff4d11
3 changed files with 168 additions and 45 deletions
|
@ -7607,6 +7607,67 @@ async def model_metrics(
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/model/metrics/slow_responses",
|
||||||
|
description="View number of hanging requests per model_group",
|
||||||
|
tags=["model management"],
|
||||||
|
include_in_schema=False,
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
async def model_metrics_slow_responses(
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
_selected_model_group: Optional[str] = "gpt-4-32k",
|
||||||
|
startTime: Optional[datetime] = None,
|
||||||
|
endTime: Optional[datetime] = None,
|
||||||
|
):
|
||||||
|
global prisma_client, llm_router, proxy_logging_obj
|
||||||
|
if prisma_client is None:
|
||||||
|
raise ProxyException(
|
||||||
|
message="Prisma Client is not initialized",
|
||||||
|
type="internal_error",
|
||||||
|
param="None",
|
||||||
|
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
)
|
||||||
|
startTime = startTime or datetime.now() - timedelta(days=30)
|
||||||
|
endTime = endTime or datetime.now()
|
||||||
|
|
||||||
|
alerting_threshold = (
|
||||||
|
proxy_logging_obj.slack_alerting_instance.alerting_threshold or 300
|
||||||
|
)
|
||||||
|
alerting_threshold = int(alerting_threshold)
|
||||||
|
|
||||||
|
sql_query = """
|
||||||
|
SELECT
|
||||||
|
api_base,
|
||||||
|
COUNT(*) AS total_count,
|
||||||
|
SUM(CASE
|
||||||
|
WHEN ("endTime" - "startTime") >= (INTERVAL '1 SECOND' * CAST($1 AS INTEGER)) THEN 1
|
||||||
|
ELSE 0
|
||||||
|
END) AS slow_count
|
||||||
|
FROM
|
||||||
|
"LiteLLM_SpendLogs"
|
||||||
|
WHERE
|
||||||
|
"model" = $2
|
||||||
|
AND "cache_hit" != 'True'
|
||||||
|
GROUP BY
|
||||||
|
api_base
|
||||||
|
ORDER BY
|
||||||
|
slow_count DESC;
|
||||||
|
"""
|
||||||
|
|
||||||
|
db_response = await prisma_client.db.query_raw(
|
||||||
|
sql_query, alerting_threshold, _selected_model_group
|
||||||
|
)
|
||||||
|
|
||||||
|
if db_response is not None:
|
||||||
|
for row in db_response:
|
||||||
|
_api_base = row.get("api_base") or ""
|
||||||
|
if "/openai/" in _api_base:
|
||||||
|
_api_base = _api_base.split("/openai/")[0]
|
||||||
|
row["api_base"] = _api_base
|
||||||
|
return db_response
|
||||||
|
|
||||||
|
|
||||||
@router.get(
|
@router.get(
|
||||||
"/model/metrics/exceptions",
|
"/model/metrics/exceptions",
|
||||||
description="View number of failed requests per model on config.yaml",
|
description="View number of failed requests per model on config.yaml",
|
||||||
|
|
|
@ -18,7 +18,7 @@ import {
|
||||||
} from "@tremor/react";
|
} from "@tremor/react";
|
||||||
import { TabPanel, TabPanels, TabGroup, TabList, Tab, TextInput, Icon } from "@tremor/react";
|
import { TabPanel, TabPanels, TabGroup, TabList, Tab, TextInput, Icon } from "@tremor/react";
|
||||||
import { Select, SelectItem, MultiSelect, MultiSelectItem } from "@tremor/react";
|
import { Select, SelectItem, MultiSelect, MultiSelectItem } from "@tremor/react";
|
||||||
import { modelInfoCall, userGetRequesedtModelsCall, modelCreateCall, Model, modelCostMap, modelDeleteCall, healthCheckCall, modelUpdateCall, modelMetricsCall, modelExceptionsCall } from "./networking";
|
import { modelInfoCall, userGetRequesedtModelsCall, modelCreateCall, Model, modelCostMap, modelDeleteCall, healthCheckCall, modelUpdateCall, modelMetricsCall, modelExceptionsCall, modelMetricsSlowResponsesCall } from "./networking";
|
||||||
import { BarChart, AreaChart } from "@tremor/react";
|
import { BarChart, AreaChart } from "@tremor/react";
|
||||||
import {
|
import {
|
||||||
Button as Button2,
|
Button as Button2,
|
||||||
|
@ -205,6 +205,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
const [modelExceptions, setModelExceptions] = useState<any[]>([]);
|
const [modelExceptions, setModelExceptions] = useState<any[]>([]);
|
||||||
const [allExceptions, setAllExceptions] = useState<any[]>([]);
|
const [allExceptions, setAllExceptions] = useState<any[]>([]);
|
||||||
const [failureTableData, setFailureTableData] = useState<any[]>([]);
|
const [failureTableData, setFailureTableData] = useState<any[]>([]);
|
||||||
|
const [slowResponsesData, setSlowResponsesData] = useState<any[]>([]);
|
||||||
|
|
||||||
const EditModelModal: React.FC<EditModelModalProps> = ({ visible, onCancel, model, onSubmit }) => {
|
const EditModelModal: React.FC<EditModelModalProps> = ({ visible, onCancel, model, onSubmit }) => {
|
||||||
const [form] = Form.useForm();
|
const [form] = Form.useForm();
|
||||||
|
@ -479,39 +480,51 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
setAllExceptions(modelExceptionsResponse.exception_types);
|
setAllExceptions(modelExceptionsResponse.exception_types);
|
||||||
|
|
||||||
|
|
||||||
let modelMetricsData = modelMetricsResponse.data;
|
const slowResponses = await modelMetricsSlowResponsesCall(
|
||||||
let successdeploymentToSuccess: Record<string, number> = {};
|
accessToken,
|
||||||
for (let i = 0; i < modelMetricsData.length; i++) {
|
userID,
|
||||||
let element = modelMetricsData[i];
|
userRole,
|
||||||
let _model_name = element.model;
|
null
|
||||||
let _num_requests = element.num_requests;
|
)
|
||||||
successdeploymentToSuccess[_model_name] = _num_requests
|
|
||||||
}
|
|
||||||
console.log("successdeploymentToSuccess:", successdeploymentToSuccess)
|
|
||||||
|
|
||||||
let failureTableData = [];
|
console.log("slowResponses:", slowResponses)
|
||||||
let _failureData = modelExceptionsResponse.data;
|
|
||||||
for (let i = 0; i < _failureData.length; i++) {
|
|
||||||
const model = _failureData[i];
|
|
||||||
let _model_name = model.model;
|
|
||||||
let total_exceptions = model.total_exceptions;
|
|
||||||
let total_Requests = successdeploymentToSuccess[_model_name];
|
|
||||||
if (total_Requests == null) {
|
|
||||||
total_Requests = 0
|
|
||||||
}
|
|
||||||
let _data = {
|
|
||||||
model: _model_name,
|
|
||||||
total_exceptions: total_exceptions,
|
|
||||||
total_Requests: total_Requests,
|
|
||||||
failure_rate: total_Requests / total_exceptions
|
|
||||||
}
|
|
||||||
failureTableData.push(_data);
|
|
||||||
// sort failureTableData by failure_rate
|
|
||||||
failureTableData.sort((a, b) => b.failure_rate - a.failure_rate);
|
|
||||||
|
|
||||||
setFailureTableData(failureTableData);
|
setSlowResponsesData(slowResponses);
|
||||||
console.log("failureTableData:", failureTableData);
|
|
||||||
}
|
|
||||||
|
// let modelMetricsData = modelMetricsResponse.data;
|
||||||
|
// let successdeploymentToSuccess: Record<string, number> = {};
|
||||||
|
// for (let i = 0; i < modelMetricsData.length; i++) {
|
||||||
|
// let element = modelMetricsData[i];
|
||||||
|
// let _model_name = element.model;
|
||||||
|
// let _num_requests = element.num_requests;
|
||||||
|
// successdeploymentToSuccess[_model_name] = _num_requests
|
||||||
|
// }
|
||||||
|
// console.log("successdeploymentToSuccess:", successdeploymentToSuccess)
|
||||||
|
|
||||||
|
// let failureTableData = [];
|
||||||
|
// let _failureData = modelExceptionsResponse.data;
|
||||||
|
// for (let i = 0; i < _failureData.length; i++) {
|
||||||
|
// const model = _failureData[i];
|
||||||
|
// let _model_name = model.model;
|
||||||
|
// let total_exceptions = model.total_exceptions;
|
||||||
|
// let total_Requests = successdeploymentToSuccess[_model_name];
|
||||||
|
// if (total_Requests == null) {
|
||||||
|
// total_Requests = 0
|
||||||
|
// }
|
||||||
|
// let _data = {
|
||||||
|
// model: _model_name,
|
||||||
|
// total_exceptions: total_exceptions,
|
||||||
|
// total_Requests: total_Requests,
|
||||||
|
// failure_rate: total_Requests / total_exceptions
|
||||||
|
// }
|
||||||
|
// failureTableData.push(_data);
|
||||||
|
// // sort failureTableData by failure_rate
|
||||||
|
// failureTableData.sort((a, b) => b.failure_rate - a.failure_rate);
|
||||||
|
|
||||||
|
// setFailureTableData(failureTableData);
|
||||||
|
// console.log("failureTableData:", failureTableData);
|
||||||
|
// }
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("There was an error fetching the model data", error);
|
console.error("There was an error fetching the model data", error);
|
||||||
|
@ -691,6 +704,18 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
setModelExceptions(modelExceptionsResponse.data);
|
setModelExceptions(modelExceptionsResponse.data);
|
||||||
setAllExceptions(modelExceptionsResponse.exception_types);
|
setAllExceptions(modelExceptionsResponse.exception_types);
|
||||||
|
|
||||||
|
|
||||||
|
const slowResponses = await modelMetricsSlowResponsesCall(
|
||||||
|
accessToken,
|
||||||
|
userID,
|
||||||
|
userRole,
|
||||||
|
modelGroup
|
||||||
|
)
|
||||||
|
|
||||||
|
console.log("slowResponses:", slowResponses)
|
||||||
|
|
||||||
|
setSlowResponsesData(slowResponses);
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Failed to fetch model metrics", error);
|
console.error("Failed to fetch model metrics", error);
|
||||||
}
|
}
|
||||||
|
@ -1110,7 +1135,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
|
|
||||||
<Grid numItems={2}>
|
<Grid numItems={2}>
|
||||||
<Col>
|
<Col>
|
||||||
<Card className="mr-2">
|
<Card className="mr-2 max-h-[400px] min-h-[400px]">
|
||||||
<Title>Avg Latency per Token</Title><p className="text-gray-500 italic"> (seconds/token)</p>
|
<Title>Avg Latency per Token</Title><p className="text-gray-500 italic"> (seconds/token)</p>
|
||||||
<Text className="text-gray-500 italic mt-1 mb-1">average Latency for successfull requests divided by the total tokens</Text>
|
<Text className="text-gray-500 italic mt-1 mb-1">average Latency for successfull requests divided by the total tokens</Text>
|
||||||
{ modelMetrics && modelMetricsCategories && (
|
{ modelMetrics && modelMetricsCategories && (
|
||||||
|
@ -1126,29 +1151,26 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</Card>
|
</Card>
|
||||||
</Col>
|
</Col>
|
||||||
<Col>
|
<Col>
|
||||||
<Card className="ml-2">
|
<Card className="ml-2 max-h-[400px] min-h-[400px] overflow-y-auto">
|
||||||
<Table>
|
<Table>
|
||||||
<TableHead>
|
<TableHead>
|
||||||
<TableRow>
|
<TableRow>
|
||||||
<TableHeaderCell>Model</TableHeaderCell>
|
<TableHeaderCell>Deployment</TableHeaderCell>
|
||||||
<TableHeaderCell>Success Requests</TableHeaderCell>
|
<TableHeaderCell>Success Responses</TableHeaderCell>
|
||||||
<TableHeaderCell>Error Requests</TableHeaderCell>
|
<TableHeaderCell>Slow Responses <p>Success Responses taking 600+s</p></TableHeaderCell>
|
||||||
<TableHeaderCell>Failure %</TableHeaderCell>
|
|
||||||
|
|
||||||
</TableRow>
|
</TableRow>
|
||||||
</TableHead>
|
</TableHead>
|
||||||
<TableBody>
|
<TableBody>
|
||||||
{failureTableData.map((metric, idx) => (
|
{slowResponsesData.map((metric, idx) => (
|
||||||
<TableRow key={idx}>
|
<TableRow key={idx}>
|
||||||
<TableCell>{metric.model}</TableCell>
|
<TableCell>{metric.api_base}</TableCell>
|
||||||
<TableCell>{metric.total_Requests}</TableCell>
|
<TableCell>{metric.total_count}</TableCell>
|
||||||
<TableCell>{metric.total_exceptions}</TableCell>
|
<TableCell>{metric.slow_count}</TableCell>
|
||||||
<TableCell>{metric.failure_rate}%</TableCell>
|
|
||||||
</TableRow>
|
</TableRow>
|
||||||
|
|
||||||
))}
|
))}
|
||||||
|
|
|
@ -475,6 +475,46 @@ export const modelMetricsCall = async (
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
export const modelMetricsSlowResponsesCall = async (
|
||||||
|
accessToken: String,
|
||||||
|
userID: String,
|
||||||
|
userRole: String,
|
||||||
|
modelGroup: String | null,
|
||||||
|
) => {
|
||||||
|
/**
|
||||||
|
* Get all models on proxy
|
||||||
|
*/
|
||||||
|
try {
|
||||||
|
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics/slow_responses` : `/model/metrics/slow_responses`;
|
||||||
|
if (modelGroup) {
|
||||||
|
url = `${url}?_selected_model_group=${modelGroup}`
|
||||||
|
}
|
||||||
|
// message.info("Requesting model data");
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: "GET",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorData = await response.text();
|
||||||
|
message.error(errorData, 20);
|
||||||
|
throw new Error("Network response was not ok");
|
||||||
|
}
|
||||||
|
const data = await response.json();
|
||||||
|
// message.info("Received model data");
|
||||||
|
return data;
|
||||||
|
// Handle success - you might want to update some state or UI based on the created key
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to create key:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
export const modelExceptionsCall = async (
|
export const modelExceptionsCall = async (
|
||||||
accessToken: String,
|
accessToken: String,
|
||||||
userID: String,
|
userID: String,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue