ui render usage metrics on test key page

This commit is contained in:
Ishaan Jaff 2025-04-11 19:05:35 -07:00
parent 4d193816db
commit d7d383887c
2 changed files with 36 additions and 25 deletions

View file

@ -187,19 +187,38 @@ const ChatUI: React.FC<ChatUIProps> = ({
};
const updateTimingData = (timeToFirstToken: number) => {
console.log("updateTimingData called with:", timeToFirstToken);
setChatHistory((prevHistory) => {
const lastMessage = prevHistory[prevHistory.length - 1];
console.log("Current last message:", lastMessage);
if (lastMessage && lastMessage.role === "assistant") {
return [
console.log("Updating assistant message with timeToFirstToken:", timeToFirstToken);
const updatedHistory = [
...prevHistory.slice(0, prevHistory.length - 1),
{
...lastMessage,
timeToFirstToken
},
];
console.log("Updated chat history:", updatedHistory);
return updatedHistory;
}
// If the last message is a user message and no assistant message exists yet,
// create a new assistant message with empty content
else if (lastMessage && lastMessage.role === "user") {
console.log("Creating new assistant message with timeToFirstToken:", timeToFirstToken);
return [
...prevHistory,
{
role: "assistant",
content: "",
timeToFirstToken
}
];
}
console.log("No appropriate message found to update timing");
return prevHistory;
});
};

View file

@ -42,18 +42,32 @@ export async function makeOpenAIChatCompletionRequest(
const response = await client.chat.completions.create({
model: selectedModel,
stream: true,
stream_options: {
include_usage: true,
},
messages: chatHistory as ChatCompletionMessageParam[],
}, { signal });
for await (const chunk of response) {
console.log("Stream chunk:", chunk);
// Measure time to first token
if (!firstTokenReceived && chunk.choices[0]?.delta?.content) {
// Process content and measure time to first token
const delta = chunk.choices[0]?.delta as any;
// Debug what's in the delta
console.log("Delta content:", chunk.choices[0]?.delta?.content);
console.log("Delta reasoning content:", delta?.reasoning_content);
// Measure time to first token for either content or reasoning_content
if (!firstTokenReceived && (chunk.choices[0]?.delta?.content || (delta && delta.reasoning_content))) {
firstTokenReceived = true;
timeToFirstToken = Date.now() - startTime;
console.log("First token received! Time:", timeToFirstToken, "ms");
if (onTimingData) {
console.log("Calling onTimingData with:", timeToFirstToken);
onTimingData(timeToFirstToken);
} else {
console.log("onTimingData callback is not defined!");
}
}
@ -65,7 +79,6 @@ export async function makeOpenAIChatCompletionRequest(
}
// Process reasoning content if present - using type assertion
const delta = chunk.choices[0]?.delta as any;
if (delta && delta.reasoning_content) {
const reasoningContent = delta.reasoning_content;
if (onReasoningContent) {
@ -92,27 +105,6 @@ export async function makeOpenAIChatCompletionRequest(
onUsageData(usageData);
}
}
// Always create an estimated usage
if (onUsageData) {
try {
console.log("Creating estimated usage data");
// Create a simple usage estimate - approximately 4 characters per token
const estimatedUsage: TokenUsage = {
promptTokens: Math.ceil(JSON.stringify(chatHistory).length / 4),
completionTokens: Math.ceil((fullResponseContent.length) / 4),
totalTokens: Math.ceil((JSON.stringify(chatHistory).length + fullResponseContent.length) / 4)
};
if (fullReasoningContent) {
estimatedUsage.reasoningTokens = Math.ceil(fullReasoningContent.length / 4);
}
onUsageData(estimatedUsage);
} catch (error) {
console.error("Error estimating usage data:", error);
}
}
} catch (error) {
if (signal?.aborted) {
console.log("Chat completion request was cancelled");