ui - show model latency / token

This commit is contained in:
Ishaan Jaff 2024-04-30 17:23:27 -07:00
parent ce1817380e
commit 8177ef5ec0

View file

@ -457,8 +457,8 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
); );
console.log("Model metrics response:", modelMetricsResponse); console.log("Model metrics response:", modelMetricsResponse);
// Sort by latency (avg_latency_seconds) // Sort by latency (avg_latency_per_token)
const sortedByLatency = [...modelMetricsResponse].sort((a, b) => b.avg_latency_seconds - a.avg_latency_seconds); const sortedByLatency = [...modelMetricsResponse].sort((a, b) => b.avg_latency_per_token - a.avg_latency_per_token);
console.log("Sorted by latency:", sortedByLatency); console.log("Sorted by latency:", sortedByLatency);
setModelMetrics(modelMetricsResponse); setModelMetrics(modelMetricsResponse);
@ -686,8 +686,8 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
<div className="flex"> <div className="flex">
<Tab>All Models</Tab> <Tab>All Models</Tab>
<Tab>Add Model</Tab> <Tab>Add Model</Tab>
<Tab>Model Analytics</Tab>
<Tab><pre>/health Models</pre></Tab> <Tab><pre>/health Models</pre></Tab>
<Tab>Model Analytics</Tab>
</div> </div>
<div className="flex items-center space-x-2"> <div className="flex items-center space-x-2">
@ -992,6 +992,17 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
</Form> </Form>
</Card> </Card>
</TabPanel> </TabPanel>
<TabPanel>
<Card>
<Text>`/health` will run a very small request through your models configured on litellm</Text>
<Button onClick={runHealthCheck}>Run `/health`</Button>
{healthCheckResponse && (
<pre>{JSON.stringify(healthCheckResponse, null, 2)}</pre>
)}
</Card>
</TabPanel>
<TabPanel> <TabPanel>
<p style={{fontSize: '0.85rem', color: '#808080'}}>View how requests were load balanced within a model group</p> <p style={{fontSize: '0.85rem', color: '#808080'}}>View how requests were load balanced within a model group</p>
<p style={{fontSize: '0.85rem', color: '#808080', fontStyle: 'italic'}}>(Beta feature) only supported for Azure Model Groups</p> <p style={{fontSize: '0.85rem', color: '#808080', fontStyle: 'italic'}}>(Beta feature) only supported for Azure Model Groups</p>
@ -1017,20 +1028,33 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
</SelectItem> </SelectItem>
))} ))}
</Select> </Select>
<Card> <Grid numItems={2}>
<Title>Number Requests per Model</Title> <Col>
<BarChart <Card className="mr-2">
data={modelMetrics} <Table>
className="h-[50vh]" <TableHead>
index="model" <TableRow>
categories={["num_requests"]} <TableHeaderCell>Model</TableHeaderCell>
colors={["blue"]} <TableHeaderCell>Median Latency/Token</TableHeaderCell>
yAxisWidth={400} </TableRow>
layout="vertical" </TableHead>
tickGap={5} <TableBody>
/> {modelLatencyMetrics.map((metric, idx) => (
<TableRow key={idx}>
<TableCell>{metric.model}</TableCell>
<TableCell>{metric.avg_latency_per_token.toFixed(4)}</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</Card> </Card>
</Col>
<Col>
<Card className="ml-2">
<Title>Requests, Failures per Model</Title>
</Card>
</Col>
</Grid>
<Card className="mt-4"> <Card className="mt-4">
<Title>Exceptions per Model</Title> <Title>Exceptions per Model</Title>
<BarChart <BarChart
@ -1044,32 +1068,10 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
/> />
</Card> </Card>
<Card className="mt-4">
<Title>Latency Per Model</Title>
<BarChart
data={modelLatencyMetrics}
className="h-[50vh]"
index="model"
categories={["avg_latency_seconds"]}
colors={["red"]}
yAxisWidth={400}
layout="vertical"
tickGap={5}
/>
</Card>
</TabPanel> </TabPanel>
<TabPanel>
<Card>
<Text>`/health` will run a very small request through your models configured on litellm</Text>
<Button onClick={runHealthCheck}>Run `/health`</Button>
{healthCheckResponse && (
<pre>{JSON.stringify(healthCheckResponse, null, 2)}</pre>
)}
</Card>
</TabPanel>
</TabPanels> </TabPanels>
</TabGroup> </TabGroup>