ui render usage metrics on test key page

2025-04-24 10:14:26 +00:00 · 2025-04-11 19:05:35 -07:00 · 2025-04-11 19:05:35 -07:00 · d7d383887c
commit d7d383887c
parent 4d193816db
2 changed files with 36 additions and 25 deletions
--- a/ui/litellm-dashboard/src/components/chat_ui.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui.tsx
@ -187,19 +187,38 @@ const ChatUI: React.FC<ChatUIProps> = ({
  };

  const updateTimingData = (timeToFirstToken: number) => {
+    console.log("updateTimingData called with:", timeToFirstToken);
    setChatHistory((prevHistory) => {
      const lastMessage = prevHistory[prevHistory.length - 1];
+      console.log("Current last message:", lastMessage);
      
      if (lastMessage && lastMessage.role === "assistant") {
-        return [
+        console.log("Updating assistant message with timeToFirstToken:", timeToFirstToken);
+        const updatedHistory = [
          ...prevHistory.slice(0, prevHistory.length - 1),
          { 
            ...lastMessage,
            timeToFirstToken
          },
        ];
+        console.log("Updated chat history:", updatedHistory);
+        return updatedHistory;
+      } 
+      // If the last message is a user message and no assistant message exists yet,
+      // create a new assistant message with empty content
+      else if (lastMessage && lastMessage.role === "user") {
+        console.log("Creating new assistant message with timeToFirstToken:", timeToFirstToken);
+        return [
+          ...prevHistory,
+          { 
+            role: "assistant", 
+            content: "", 
+            timeToFirstToken 
+          }
+        ];
      }
      
+      console.log("No appropriate message found to update timing");
      return prevHistory;
    });
  };
--- a/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx
@ -42,18 +42,32 @@ export async function makeOpenAIChatCompletionRequest(
      const response = await client.chat.completions.create({
        model: selectedModel,
        stream: true,
+        stream_options: {
+          include_usage: true,
+        },
        messages: chatHistory as ChatCompletionMessageParam[],
      }, { signal });
  
      for await (const chunk of response) {
        console.log("Stream chunk:", chunk);
        
-        // Measure time to first token
-        if (!firstTokenReceived && chunk.choices[0]?.delta?.content) {
+        // Process content and measure time to first token
+        const delta = chunk.choices[0]?.delta as any;
+        
+        // Debug what's in the delta
+        console.log("Delta content:", chunk.choices[0]?.delta?.content);
+        console.log("Delta reasoning content:", delta?.reasoning_content);
+        
+        // Measure time to first token for either content or reasoning_content
+        if (!firstTokenReceived && (chunk.choices[0]?.delta?.content || (delta && delta.reasoning_content))) {
          firstTokenReceived = true;
          timeToFirstToken = Date.now() - startTime;
+          console.log("First token received! Time:", timeToFirstToken, "ms");
          if (onTimingData) {
+            console.log("Calling onTimingData with:", timeToFirstToken);
            onTimingData(timeToFirstToken);
+          } else {
+            console.log("onTimingData callback is not defined!");
          }
        }
        
@ -65,7 +79,6 @@ export async function makeOpenAIChatCompletionRequest(
        }
        
        // Process reasoning content if present - using type assertion
-        const delta = chunk.choices[0]?.delta as any;
        if (delta && delta.reasoning_content) {
          const reasoningContent = delta.reasoning_content;
          if (onReasoningContent) {
@ -92,27 +105,6 @@ export async function makeOpenAIChatCompletionRequest(
          onUsageData(usageData);
        }
      }
-      
-      // Always create an estimated usage
-      if (onUsageData) {
-        try {
-          console.log("Creating estimated usage data");
-          // Create a simple usage estimate - approximately 4 characters per token
-          const estimatedUsage: TokenUsage = {
-            promptTokens: Math.ceil(JSON.stringify(chatHistory).length / 4), 
-            completionTokens: Math.ceil((fullResponseContent.length) / 4),
-            totalTokens: Math.ceil((JSON.stringify(chatHistory).length + fullResponseContent.length) / 4)
-          };
-          
-          if (fullReasoningContent) {
-            estimatedUsage.reasoningTokens = Math.ceil(fullReasoningContent.length / 4);
-          }
-          
-          onUsageData(estimatedUsage);
-        } catch (error) {
-          console.error("Error estimating usage data:", error);
-        }
-      }
    } catch (error) {
      if (signal?.aborted) {
        console.log("Chat completion request was cancelled");