mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 10:14:26 +00:00
ui render usage metrics on test key page
This commit is contained in:
parent
4d193816db
commit
d7d383887c
2 changed files with 36 additions and 25 deletions
|
@ -187,19 +187,38 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
|||
};
|
||||
|
||||
const updateTimingData = (timeToFirstToken: number) => {
|
||||
console.log("updateTimingData called with:", timeToFirstToken);
|
||||
setChatHistory((prevHistory) => {
|
||||
const lastMessage = prevHistory[prevHistory.length - 1];
|
||||
console.log("Current last message:", lastMessage);
|
||||
|
||||
if (lastMessage && lastMessage.role === "assistant") {
|
||||
return [
|
||||
console.log("Updating assistant message with timeToFirstToken:", timeToFirstToken);
|
||||
const updatedHistory = [
|
||||
...prevHistory.slice(0, prevHistory.length - 1),
|
||||
{
|
||||
...lastMessage,
|
||||
timeToFirstToken
|
||||
},
|
||||
];
|
||||
console.log("Updated chat history:", updatedHistory);
|
||||
return updatedHistory;
|
||||
}
|
||||
// If the last message is a user message and no assistant message exists yet,
|
||||
// create a new assistant message with empty content
|
||||
else if (lastMessage && lastMessage.role === "user") {
|
||||
console.log("Creating new assistant message with timeToFirstToken:", timeToFirstToken);
|
||||
return [
|
||||
...prevHistory,
|
||||
{
|
||||
role: "assistant",
|
||||
content: "",
|
||||
timeToFirstToken
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
console.log("No appropriate message found to update timing");
|
||||
return prevHistory;
|
||||
});
|
||||
};
|
||||
|
|
|
@ -42,18 +42,32 @@ export async function makeOpenAIChatCompletionRequest(
|
|||
const response = await client.chat.completions.create({
|
||||
model: selectedModel,
|
||||
stream: true,
|
||||
stream_options: {
|
||||
include_usage: true,
|
||||
},
|
||||
messages: chatHistory as ChatCompletionMessageParam[],
|
||||
}, { signal });
|
||||
|
||||
for await (const chunk of response) {
|
||||
console.log("Stream chunk:", chunk);
|
||||
|
||||
// Measure time to first token
|
||||
if (!firstTokenReceived && chunk.choices[0]?.delta?.content) {
|
||||
// Process content and measure time to first token
|
||||
const delta = chunk.choices[0]?.delta as any;
|
||||
|
||||
// Debug what's in the delta
|
||||
console.log("Delta content:", chunk.choices[0]?.delta?.content);
|
||||
console.log("Delta reasoning content:", delta?.reasoning_content);
|
||||
|
||||
// Measure time to first token for either content or reasoning_content
|
||||
if (!firstTokenReceived && (chunk.choices[0]?.delta?.content || (delta && delta.reasoning_content))) {
|
||||
firstTokenReceived = true;
|
||||
timeToFirstToken = Date.now() - startTime;
|
||||
console.log("First token received! Time:", timeToFirstToken, "ms");
|
||||
if (onTimingData) {
|
||||
console.log("Calling onTimingData with:", timeToFirstToken);
|
||||
onTimingData(timeToFirstToken);
|
||||
} else {
|
||||
console.log("onTimingData callback is not defined!");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -65,7 +79,6 @@ export async function makeOpenAIChatCompletionRequest(
|
|||
}
|
||||
|
||||
// Process reasoning content if present - using type assertion
|
||||
const delta = chunk.choices[0]?.delta as any;
|
||||
if (delta && delta.reasoning_content) {
|
||||
const reasoningContent = delta.reasoning_content;
|
||||
if (onReasoningContent) {
|
||||
|
@ -92,27 +105,6 @@ export async function makeOpenAIChatCompletionRequest(
|
|||
onUsageData(usageData);
|
||||
}
|
||||
}
|
||||
|
||||
// Always create an estimated usage
|
||||
if (onUsageData) {
|
||||
try {
|
||||
console.log("Creating estimated usage data");
|
||||
// Create a simple usage estimate - approximately 4 characters per token
|
||||
const estimatedUsage: TokenUsage = {
|
||||
promptTokens: Math.ceil(JSON.stringify(chatHistory).length / 4),
|
||||
completionTokens: Math.ceil((fullResponseContent.length) / 4),
|
||||
totalTokens: Math.ceil((JSON.stringify(chatHistory).length + fullResponseContent.length) / 4)
|
||||
};
|
||||
|
||||
if (fullReasoningContent) {
|
||||
estimatedUsage.reasoningTokens = Math.ceil(fullReasoningContent.length / 4);
|
||||
}
|
||||
|
||||
onUsageData(estimatedUsage);
|
||||
} catch (error) {
|
||||
console.error("Error estimating usage data:", error);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (signal?.aborted) {
|
||||
console.log("Chat completion request was cancelled");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue