From d7d383887c48621bbbb25606451d218a393abc3d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 11 Apr 2025 19:05:35 -0700 Subject: [PATCH] ui render usage metrics on test key page --- .../src/components/chat_ui.tsx | 21 +++++++++- .../chat_ui/llm_calls/chat_completion.tsx | 40 ++++++++----------- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/ui/litellm-dashboard/src/components/chat_ui.tsx b/ui/litellm-dashboard/src/components/chat_ui.tsx index 3a6ea6b294..ae8d15cfe1 100644 --- a/ui/litellm-dashboard/src/components/chat_ui.tsx +++ b/ui/litellm-dashboard/src/components/chat_ui.tsx @@ -187,19 +187,38 @@ const ChatUI: React.FC = ({ }; const updateTimingData = (timeToFirstToken: number) => { + console.log("updateTimingData called with:", timeToFirstToken); setChatHistory((prevHistory) => { const lastMessage = prevHistory[prevHistory.length - 1]; + console.log("Current last message:", lastMessage); if (lastMessage && lastMessage.role === "assistant") { - return [ + console.log("Updating assistant message with timeToFirstToken:", timeToFirstToken); + const updatedHistory = [ ...prevHistory.slice(0, prevHistory.length - 1), { ...lastMessage, timeToFirstToken }, ]; + console.log("Updated chat history:", updatedHistory); + return updatedHistory; + } + // If the last message is a user message and no assistant message exists yet, + // create a new assistant message with empty content + else if (lastMessage && lastMessage.role === "user") { + console.log("Creating new assistant message with timeToFirstToken:", timeToFirstToken); + return [ + ...prevHistory, + { + role: "assistant", + content: "", + timeToFirstToken + } + ]; } + console.log("No appropriate message found to update timing"); return prevHistory; }); }; diff --git a/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx index 8aaac4b2b6..010025f0ba 100644 --- a/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx +++ b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx @@ -42,18 +42,32 @@ export async function makeOpenAIChatCompletionRequest( const response = await client.chat.completions.create({ model: selectedModel, stream: true, + stream_options: { + include_usage: true, + }, messages: chatHistory as ChatCompletionMessageParam[], }, { signal }); for await (const chunk of response) { console.log("Stream chunk:", chunk); - // Measure time to first token - if (!firstTokenReceived && chunk.choices[0]?.delta?.content) { + // Process content and measure time to first token + const delta = chunk.choices[0]?.delta as any; + + // Debug what's in the delta + console.log("Delta content:", chunk.choices[0]?.delta?.content); + console.log("Delta reasoning content:", delta?.reasoning_content); + + // Measure time to first token for either content or reasoning_content + if (!firstTokenReceived && (chunk.choices[0]?.delta?.content || (delta && delta.reasoning_content))) { firstTokenReceived = true; timeToFirstToken = Date.now() - startTime; + console.log("First token received! Time:", timeToFirstToken, "ms"); if (onTimingData) { + console.log("Calling onTimingData with:", timeToFirstToken); onTimingData(timeToFirstToken); + } else { + console.log("onTimingData callback is not defined!"); } } @@ -65,7 +79,6 @@ export async function makeOpenAIChatCompletionRequest( } // Process reasoning content if present - using type assertion - const delta = chunk.choices[0]?.delta as any; if (delta && delta.reasoning_content) { const reasoningContent = delta.reasoning_content; if (onReasoningContent) { @@ -92,27 +105,6 @@ export async function makeOpenAIChatCompletionRequest( onUsageData(usageData); } } - - // Always create an estimated usage - if (onUsageData) { - try { - console.log("Creating estimated usage data"); - // Create a simple usage estimate - approximately 4 characters per token - const estimatedUsage: TokenUsage = { - promptTokens: Math.ceil(JSON.stringify(chatHistory).length / 4), - completionTokens: Math.ceil((fullResponseContent.length) / 4), - totalTokens: Math.ceil((JSON.stringify(chatHistory).length + fullResponseContent.length) / 4) - }; - - if (fullReasoningContent) { - estimatedUsage.reasoningTokens = Math.ceil(fullReasoningContent.length / 4); - } - - onUsageData(estimatedUsage); - } catch (error) { - console.error("Error estimating usage data:", error); - } - } } catch (error) { if (signal?.aborted) { console.log("Chat completion request was cancelled");