Merge pull request #3390 from BerriAI/litellm_show_slow_responses

[UI] show slow responses + num requests per deployment
This commit is contained in:
Ishaan Jaff 2024-05-01 17:32:26 -07:00 committed by GitHub
commit b3f5ff4d11
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 168 additions and 45 deletions

View file

@ -7607,6 +7607,67 @@ async def model_metrics(
} }
@router.get(
"/model/metrics/slow_responses",
description="View number of hanging requests per model_group",
tags=["model management"],
include_in_schema=False,
dependencies=[Depends(user_api_key_auth)],
)
async def model_metrics_slow_responses(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
_selected_model_group: Optional[str] = "gpt-4-32k",
startTime: Optional[datetime] = None,
endTime: Optional[datetime] = None,
):
global prisma_client, llm_router, proxy_logging_obj
if prisma_client is None:
raise ProxyException(
message="Prisma Client is not initialized",
type="internal_error",
param="None",
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
startTime = startTime or datetime.now() - timedelta(days=30)
endTime = endTime or datetime.now()
alerting_threshold = (
proxy_logging_obj.slack_alerting_instance.alerting_threshold or 300
)
alerting_threshold = int(alerting_threshold)
sql_query = """
SELECT
api_base,
COUNT(*) AS total_count,
SUM(CASE
WHEN ("endTime" - "startTime") >= (INTERVAL '1 SECOND' * CAST($1 AS INTEGER)) THEN 1
ELSE 0
END) AS slow_count
FROM
"LiteLLM_SpendLogs"
WHERE
"model" = $2
AND "cache_hit" != 'True'
GROUP BY
api_base
ORDER BY
slow_count DESC;
"""
db_response = await prisma_client.db.query_raw(
sql_query, alerting_threshold, _selected_model_group
)
if db_response is not None:
for row in db_response:
_api_base = row.get("api_base") or ""
if "/openai/" in _api_base:
_api_base = _api_base.split("/openai/")[0]
row["api_base"] = _api_base
return db_response
@router.get( @router.get(
"/model/metrics/exceptions", "/model/metrics/exceptions",
description="View number of failed requests per model on config.yaml", description="View number of failed requests per model on config.yaml",

View file

@ -18,7 +18,7 @@ import {
} from "@tremor/react"; } from "@tremor/react";
import { TabPanel, TabPanels, TabGroup, TabList, Tab, TextInput, Icon } from "@tremor/react"; import { TabPanel, TabPanels, TabGroup, TabList, Tab, TextInput, Icon } from "@tremor/react";
import { Select, SelectItem, MultiSelect, MultiSelectItem } from "@tremor/react"; import { Select, SelectItem, MultiSelect, MultiSelectItem } from "@tremor/react";
import { modelInfoCall, userGetRequesedtModelsCall, modelCreateCall, Model, modelCostMap, modelDeleteCall, healthCheckCall, modelUpdateCall, modelMetricsCall, modelExceptionsCall } from "./networking"; import { modelInfoCall, userGetRequesedtModelsCall, modelCreateCall, Model, modelCostMap, modelDeleteCall, healthCheckCall, modelUpdateCall, modelMetricsCall, modelExceptionsCall, modelMetricsSlowResponsesCall } from "./networking";
import { BarChart, AreaChart } from "@tremor/react"; import { BarChart, AreaChart } from "@tremor/react";
import { import {
Button as Button2, Button as Button2,
@ -205,6 +205,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
const [modelExceptions, setModelExceptions] = useState<any[]>([]); const [modelExceptions, setModelExceptions] = useState<any[]>([]);
const [allExceptions, setAllExceptions] = useState<any[]>([]); const [allExceptions, setAllExceptions] = useState<any[]>([]);
const [failureTableData, setFailureTableData] = useState<any[]>([]); const [failureTableData, setFailureTableData] = useState<any[]>([]);
const [slowResponsesData, setSlowResponsesData] = useState<any[]>([]);
const EditModelModal: React.FC<EditModelModalProps> = ({ visible, onCancel, model, onSubmit }) => { const EditModelModal: React.FC<EditModelModalProps> = ({ visible, onCancel, model, onSubmit }) => {
const [form] = Form.useForm(); const [form] = Form.useForm();
@ -479,39 +480,51 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
setAllExceptions(modelExceptionsResponse.exception_types); setAllExceptions(modelExceptionsResponse.exception_types);
let modelMetricsData = modelMetricsResponse.data; const slowResponses = await modelMetricsSlowResponsesCall(
let successdeploymentToSuccess: Record<string, number> = {}; accessToken,
for (let i = 0; i < modelMetricsData.length; i++) { userID,
let element = modelMetricsData[i]; userRole,
let _model_name = element.model; null
let _num_requests = element.num_requests; )
successdeploymentToSuccess[_model_name] = _num_requests
}
console.log("successdeploymentToSuccess:", successdeploymentToSuccess)
let failureTableData = []; console.log("slowResponses:", slowResponses)
let _failureData = modelExceptionsResponse.data;
for (let i = 0; i < _failureData.length; i++) {
const model = _failureData[i];
let _model_name = model.model;
let total_exceptions = model.total_exceptions;
let total_Requests = successdeploymentToSuccess[_model_name];
if (total_Requests == null) {
total_Requests = 0
}
let _data = {
model: _model_name,
total_exceptions: total_exceptions,
total_Requests: total_Requests,
failure_rate: total_Requests / total_exceptions
}
failureTableData.push(_data);
// sort failureTableData by failure_rate
failureTableData.sort((a, b) => b.failure_rate - a.failure_rate);
setFailureTableData(failureTableData); setSlowResponsesData(slowResponses);
console.log("failureTableData:", failureTableData);
}
// let modelMetricsData = modelMetricsResponse.data;
// let successdeploymentToSuccess: Record<string, number> = {};
// for (let i = 0; i < modelMetricsData.length; i++) {
// let element = modelMetricsData[i];
// let _model_name = element.model;
// let _num_requests = element.num_requests;
// successdeploymentToSuccess[_model_name] = _num_requests
// }
// console.log("successdeploymentToSuccess:", successdeploymentToSuccess)
// let failureTableData = [];
// let _failureData = modelExceptionsResponse.data;
// for (let i = 0; i < _failureData.length; i++) {
// const model = _failureData[i];
// let _model_name = model.model;
// let total_exceptions = model.total_exceptions;
// let total_Requests = successdeploymentToSuccess[_model_name];
// if (total_Requests == null) {
// total_Requests = 0
// }
// let _data = {
// model: _model_name,
// total_exceptions: total_exceptions,
// total_Requests: total_Requests,
// failure_rate: total_Requests / total_exceptions
// }
// failureTableData.push(_data);
// // sort failureTableData by failure_rate
// failureTableData.sort((a, b) => b.failure_rate - a.failure_rate);
// setFailureTableData(failureTableData);
// console.log("failureTableData:", failureTableData);
// }
} catch (error) { } catch (error) {
console.error("There was an error fetching the model data", error); console.error("There was an error fetching the model data", error);
@ -691,6 +704,18 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
setModelExceptions(modelExceptionsResponse.data); setModelExceptions(modelExceptionsResponse.data);
setAllExceptions(modelExceptionsResponse.exception_types); setAllExceptions(modelExceptionsResponse.exception_types);
const slowResponses = await modelMetricsSlowResponsesCall(
accessToken,
userID,
userRole,
modelGroup
)
console.log("slowResponses:", slowResponses)
setSlowResponsesData(slowResponses);
} catch (error) { } catch (error) {
console.error("Failed to fetch model metrics", error); console.error("Failed to fetch model metrics", error);
} }
@ -1110,7 +1135,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
<Grid numItems={2}> <Grid numItems={2}>
<Col> <Col>
<Card className="mr-2"> <Card className="mr-2 max-h-[400px] min-h-[400px]">
<Title>Avg Latency per Token</Title><p className="text-gray-500 italic"> (seconds/token)</p> <Title>Avg Latency per Token</Title><p className="text-gray-500 italic"> (seconds/token)</p>
<Text className="text-gray-500 italic mt-1 mb-1">average Latency for successfull requests divided by the total tokens</Text> <Text className="text-gray-500 italic mt-1 mb-1">average Latency for successfull requests divided by the total tokens</Text>
{ modelMetrics && modelMetricsCategories && ( { modelMetrics && modelMetricsCategories && (
@ -1126,29 +1151,26 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
/> />
)} )}
</Card> </Card>
</Col> </Col>
<Col> <Col>
<Card className="ml-2"> <Card className="ml-2 max-h-[400px] min-h-[400px] overflow-y-auto">
<Table> <Table>
<TableHead> <TableHead>
<TableRow> <TableRow>
<TableHeaderCell>Model</TableHeaderCell> <TableHeaderCell>Deployment</TableHeaderCell>
<TableHeaderCell>Success Requests</TableHeaderCell> <TableHeaderCell>Success Responses</TableHeaderCell>
<TableHeaderCell>Error Requests</TableHeaderCell> <TableHeaderCell>Slow Responses <p>Success Responses taking 600+s</p></TableHeaderCell>
<TableHeaderCell>Failure %</TableHeaderCell>
</TableRow> </TableRow>
</TableHead> </TableHead>
<TableBody> <TableBody>
{failureTableData.map((metric, idx) => ( {slowResponsesData.map((metric, idx) => (
<TableRow key={idx}> <TableRow key={idx}>
<TableCell>{metric.model}</TableCell> <TableCell>{metric.api_base}</TableCell>
<TableCell>{metric.total_Requests}</TableCell> <TableCell>{metric.total_count}</TableCell>
<TableCell>{metric.total_exceptions}</TableCell> <TableCell>{metric.slow_count}</TableCell>
<TableCell>{metric.failure_rate}%</TableCell>
</TableRow> </TableRow>
))} ))}

View file

@ -475,6 +475,46 @@ export const modelMetricsCall = async (
}; };
export const modelMetricsSlowResponsesCall = async (
accessToken: String,
userID: String,
userRole: String,
modelGroup: String | null,
) => {
/**
* Get all models on proxy
*/
try {
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics/slow_responses` : `/model/metrics/slow_responses`;
if (modelGroup) {
url = `${url}?_selected_model_group=${modelGroup}`
}
// message.info("Requesting model data");
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
message.error(errorData, 20);
throw new Error("Network response was not ok");
}
const data = await response.json();
// message.info("Received model data");
return data;
// Handle success - you might want to update some state or UI based on the created key
} catch (error) {
console.error("Failed to create key:", error);
throw error;
}
};
export const modelExceptionsCall = async ( export const modelExceptionsCall = async (
accessToken: String, accessToken: String,
userID: String, userID: String,