From d7d383887c48621bbbb25606451d218a393abc3d Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 11 Apr 2025 19:05:35 -0700
Subject: [PATCH] ui render usage metrics on test key page

---
 .../src/components/chat_ui.tsx                | 21 +++++++++-
 .../chat_ui/llm_calls/chat_completion.tsx     | 40 ++++++++-----------
 2 files changed, 36 insertions(+), 25 deletions(-)
diff --git a/ui/litellm-dashboard/src/components/chat_ui.tsx b/ui/litellm-dashboard/src/components/chat_ui.tsx
index 3a6ea6b294..ae8d15cfe1 100644
--- a/ui/litellm-dashboard/src/components/chat_ui.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui.tsx
@@ -187,19 +187,38 @@ const ChatUI: React.FC<ChatUIProps> = ({
   };
 
   const updateTimingData = (timeToFirstToken: number) => {
+    console.log("updateTimingData called with:", timeToFirstToken);
     setChatHistory((prevHistory) => {
       const lastMessage = prevHistory[prevHistory.length - 1];
+      console.log("Current last message:", lastMessage);
       
       if (lastMessage && lastMessage.role === "assistant") {
-        return [
+        console.log("Updating assistant message with timeToFirstToken:", timeToFirstToken);
+        const updatedHistory = [
           ...prevHistory.slice(0, prevHistory.length - 1),
           { 
             ...lastMessage,
             timeToFirstToken
           },
         ];
+        console.log("Updated chat history:", updatedHistory);
+        return updatedHistory;
+      } 
+      // If the last message is a user message and no assistant message exists yet,
+      // create a new assistant message with empty content
+      else if (lastMessage && lastMessage.role === "user") {
+        console.log("Creating new assistant message with timeToFirstToken:", timeToFirstToken);
+        return [
+          ...prevHistory,
+          { 
+            role: "assistant", 
+            content: "", 
+            timeToFirstToken 
+          }
+        ];
       }
       
+      console.log("No appropriate message found to update timing");
       return prevHistory;
     });
   };
diff --git a/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx
index 8aaac4b2b6..010025f0ba 100644
--- a/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx
@@ -42,18 +42,32 @@ export async function makeOpenAIChatCompletionRequest(
       const response = await client.chat.completions.create({
         model: selectedModel,
         stream: true,
+        stream_options: {
+          include_usage: true,
+        },
         messages: chatHistory as ChatCompletionMessageParam[],
       }, { signal });
   
       for await (const chunk of response) {
         console.log("Stream chunk:", chunk);
         
-        // Measure time to first token
-        if (!firstTokenReceived && chunk.choices[0]?.delta?.content) {
+        // Process content and measure time to first token
+        const delta = chunk.choices[0]?.delta as any;
+        
+        // Debug what's in the delta
+        console.log("Delta content:", chunk.choices[0]?.delta?.content);
+        console.log("Delta reasoning content:", delta?.reasoning_content);
+        
+        // Measure time to first token for either content or reasoning_content
+        if (!firstTokenReceived && (chunk.choices[0]?.delta?.content || (delta && delta.reasoning_content))) {
           firstTokenReceived = true;
           timeToFirstToken = Date.now() - startTime;
+          console.log("First token received! Time:", timeToFirstToken, "ms");
           if (onTimingData) {
+            console.log("Calling onTimingData with:", timeToFirstToken);
             onTimingData(timeToFirstToken);
+          } else {
+            console.log("onTimingData callback is not defined!");
           }
         }
         
@@ -65,7 +79,6 @@ export async function makeOpenAIChatCompletionRequest(
         }
         
         // Process reasoning content if present - using type assertion
-        const delta = chunk.choices[0]?.delta as any;
         if (delta && delta.reasoning_content) {
           const reasoningContent = delta.reasoning_content;
           if (onReasoningContent) {
@@ -92,27 +105,6 @@ export async function makeOpenAIChatCompletionRequest(
           onUsageData(usageData);
         }
       }
-      
-      // Always create an estimated usage
-      if (onUsageData) {
-        try {
-          console.log("Creating estimated usage data");
-          // Create a simple usage estimate - approximately 4 characters per token
-          const estimatedUsage: TokenUsage = {
-            promptTokens: Math.ceil(JSON.stringify(chatHistory).length / 4), 
-            completionTokens: Math.ceil((fullResponseContent.length) / 4),
-            totalTokens: Math.ceil((JSON.stringify(chatHistory).length + fullResponseContent.length) / 4)
-          };
-          
-          if (fullReasoningContent) {
-            estimatedUsage.reasoningTokens = Math.ceil(fullReasoningContent.length / 4);
-          }
-          
-          onUsageData(estimatedUsage);
-        } catch (error) {
-          console.error("Error estimating usage data:", error);
-        }
-      }
     } catch (error) {
       if (signal?.aborted) {
         console.log("Chat completion request was cancelled");