[UI] Render Reasoning content, ttft, usage metrics on test key page (#9931)

* add BaseReasoningEffortTests * BaseReasoningLLMTests * fix test rename * docs update thinking / reasoning content docs * show reasoning content on chat ui * chat ui allow pasting in content * chat ui fix size * chat ui, show num reasoning tokens used * ui render usage metrics on test key page
2025-04-24 18:24:20 +00:00 · 2025-04-11 21:08:10 -07:00 · 2025-04-11 21:08:10 -07:00 · 7fde06d8d3
commit 7fde06d8d3
parent 57bc03b30b
6 changed files with 521 additions and 20 deletions
--- a/ui/litellm-dashboard/src/components/chat_ui.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui.tsx
@ -23,7 +23,7 @@ import {
  Divider,
 } from "@tremor/react";

-import { message, Select, Spin, Typography, Tooltip } from "antd";
+import { message, Select, Spin, Typography, Tooltip, Input } from "antd";
 import { makeOpenAIChatCompletionRequest } from "./chat_ui/llm_calls/chat_completion";
 import { makeOpenAIImageGenerationRequest } from "./chat_ui/llm_calls/image_generation";
 import { fetchAvailableModels, ModelGroup  } from "./chat_ui/llm_calls/fetch_models";
@ -33,6 +33,9 @@ import { coy } from 'react-syntax-highlighter/dist/esm/styles/prism';
 import EndpointSelector from "./chat_ui/EndpointSelector";
 import TagSelector from "./tag_management/TagSelector";
 import { determineEndpointType } from "./chat_ui/EndpointUtils";
+import { MessageType } from "./chat_ui/types";
+import ReasoningContent from "./chat_ui/ReasoningContent";
+import ResponseMetrics, { TokenUsage } from "./chat_ui/ResponseMetrics";
 import { 
  SendOutlined, 
  ApiOutlined, 
@ -45,6 +48,8 @@ import {
  TagsOutlined
 } from "@ant-design/icons";

+const { TextArea } = Input;
+
 interface ChatUIProps {
  accessToken: string | null;
  token: string | null;
@ -65,7 +70,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
  );
  const [apiKey, setApiKey] = useState("");
  const [inputMessage, setInputMessage] = useState("");
-  const [chatHistory, setChatHistory] = useState<{ role: string; content: string; model?: string; isImage?: boolean }[]>([]);
+  const [chatHistory, setChatHistory] = useState<MessageType[]>([]);
  const [selectedModel, setSelectedModel] = useState<string | undefined>(
    undefined
  );
@ -138,7 +143,11 @@ const ChatUI: React.FC<ChatUIProps> = ({
      if (lastMessage && lastMessage.role === role && !lastMessage.isImage) {
        return [
          ...prevHistory.slice(0, prevHistory.length - 1),
-          { role, content: lastMessage.content + chunk, model },
+          { 
+            ...lastMessage,
+            content: lastMessage.content + chunk, 
+            model 
+          },
        ];
      } else {
        return [...prevHistory, { role, content: chunk, model }];
@ -146,6 +155,97 @@ const ChatUI: React.FC<ChatUIProps> = ({
    });
  };

+  const updateReasoningContent = (chunk: string) => {
+    setChatHistory((prevHistory) => {
+      const lastMessage = prevHistory[prevHistory.length - 1];
+      
+      if (lastMessage && lastMessage.role === "assistant" && !lastMessage.isImage) {
+        return [
+          ...prevHistory.slice(0, prevHistory.length - 1),
+          { 
+            ...lastMessage,
+            reasoningContent: (lastMessage.reasoningContent || "") + chunk 
+          },
+        ];
+      } else {
+        // If there's no assistant message yet, we'll create one with empty content
+        // but with reasoning content
+        if (prevHistory.length > 0 && prevHistory[prevHistory.length - 1].role === "user") {
+          return [
+            ...prevHistory,
+            { 
+              role: "assistant", 
+              content: "", 
+              reasoningContent: chunk 
+            }
+          ];
+        }
+        
+        return prevHistory;
+      }
+    });
+  };
+
+  const updateTimingData = (timeToFirstToken: number) => {
+    console.log("updateTimingData called with:", timeToFirstToken);
+    setChatHistory((prevHistory) => {
+      const lastMessage = prevHistory[prevHistory.length - 1];
+      console.log("Current last message:", lastMessage);
+      
+      if (lastMessage && lastMessage.role === "assistant") {
+        console.log("Updating assistant message with timeToFirstToken:", timeToFirstToken);
+        const updatedHistory = [
+          ...prevHistory.slice(0, prevHistory.length - 1),
+          { 
+            ...lastMessage,
+            timeToFirstToken
+          },
+        ];
+        console.log("Updated chat history:", updatedHistory);
+        return updatedHistory;
+      } 
+      // If the last message is a user message and no assistant message exists yet,
+      // create a new assistant message with empty content
+      else if (lastMessage && lastMessage.role === "user") {
+        console.log("Creating new assistant message with timeToFirstToken:", timeToFirstToken);
+        return [
+          ...prevHistory,
+          { 
+            role: "assistant", 
+            content: "", 
+            timeToFirstToken 
+          }
+        ];
+      }
+      
+      console.log("No appropriate message found to update timing");
+      return prevHistory;
+    });
+  };
+
+  const updateUsageData = (usage: TokenUsage) => {
+    console.log("Received usage data:", usage);
+    setChatHistory((prevHistory) => {
+      const lastMessage = prevHistory[prevHistory.length - 1];
+      
+      if (lastMessage && lastMessage.role === "assistant") {
+        console.log("Updating message with usage data:", usage);
+        const updatedMessage = { 
+          ...lastMessage,
+          usage
+        };
+        console.log("Updated message:", updatedMessage);
+        
+        return [
+          ...prevHistory.slice(0, prevHistory.length - 1),
+          updatedMessage
+        ];
+      }
+      
+      return prevHistory;
+    });
+  };
+
  const updateImageUI = (imageUrl: string, model: string) => {
    setChatHistory((prevHistory) => [
      ...prevHistory,
@ -153,10 +253,12 @@ const ChatUI: React.FC<ChatUIProps> = ({
    ]);
  };

-  const handleKeyDown = (event: React.KeyboardEvent<HTMLInputElement>) => {
-    if (event.key === 'Enter') {
+  const handleKeyDown = (event: React.KeyboardEvent<HTMLTextAreaElement>) => {
+    if (event.key === 'Enter' && !event.shiftKey) {
+      event.preventDefault(); // Prevent default to avoid newline
      handleSendMessage();
    }
+    // If Shift+Enter is pressed, the default behavior (inserting a newline) will occur
  };

  const handleCancelRequest = () => {
@ -206,7 +308,10 @@ const ChatUI: React.FC<ChatUIProps> = ({
            selectedModel,
            effectiveApiKey,
            selectedTags,
-            signal
+            signal,
+            updateReasoningContent,
+            updateTimingData,
+            updateUsageData
          );
        } else if (endpointType === EndpointType.IMAGE) {
          // For image generation
@ -410,7 +515,16 @@ const ChatUI: React.FC<ChatUIProps> = ({
                      </span>
                    )}
                  </div>
-                  <div className="whitespace-pre-wrap break-words max-w-full message-content">
+                  {message.reasoningContent && (
+                    <ReasoningContent reasoningContent={message.reasoningContent} />
+                  )}
+                  <div className="whitespace-pre-wrap break-words max-w-full message-content" 
+                       style={{ 
+                         wordWrap: 'break-word', 
+                         overflowWrap: 'break-word',
+                         wordBreak: 'break-word',
+                         hyphens: 'auto'
+                       }}>
                    {message.isImage ? (
                      <img 
                        src={message.content} 
@ -432,21 +546,33 @@ const ChatUI: React.FC<ChatUIProps> = ({
                                language={match[1]}
                                PreTag="div"
                                className="rounded-md my-2"
+                                wrapLines={true}
+                                wrapLongLines={true}
                                {...props}
                              >
                                {String(children).replace(/\n$/, '')}
                              </SyntaxHighlighter>
                            ) : (
-                              <code className={`${className} px-1.5 py-0.5 rounded bg-gray-100 text-sm font-mono`} {...props}>
+                              <code className={`${className} px-1.5 py-0.5 rounded bg-gray-100 text-sm font-mono`} style={{ wordBreak: 'break-word' }} {...props}>
                                {children}
                              </code>
                            );
-                          }
+                          },
+                          pre: ({ node, ...props }) => (
+                            <pre style={{ overflowX: 'auto', maxWidth: '100%' }} {...props} />
+                          )
                        }}
                      >
                        {message.content}
                      </ReactMarkdown>
                    )}
+                                        
+                    {message.role === "assistant" && (message.timeToFirstToken || message.usage) && (
+                      <ResponseMetrics 
+                        timeToFirstToken={message.timeToFirstToken}
+                        usage={message.usage}
+                      />
+                    )}
                  </div>
                </div>
              </div>
@ -461,18 +587,19 @@ const ChatUI: React.FC<ChatUIProps> = ({
          
          <div className="p-4 border-t border-gray-200 bg-white">
            <div className="flex items-center">
-              <TextInput
-                type="text"
+              <TextArea
                value={inputMessage}
                onChange={(e) => setInputMessage(e.target.value)}
                onKeyDown={handleKeyDown}
                placeholder={
                  endpointType === EndpointType.CHAT 
-                    ? "Type your message..." 
+                    ? "Type your message... (Shift+Enter for new line)" 
                    : "Describe the image you want to generate..."
                }
                disabled={isLoading}
                className="flex-1"
+                autoSize={{ minRows: 1, maxRows: 6 }}
+                style={{ resize: 'none', paddingRight: '10px', paddingLeft: '10px' }}
              />
              {isLoading ? (
                <Button
--- a/ui/litellm-dashboard/src/components/chat_ui/ReasoningContent.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui/ReasoningContent.tsx
@ -0,0 +1,64 @@
+import React, { useState } from "react";
+import { Button, Collapse } from "antd";
+import ReactMarkdown from "react-markdown";
+import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
+import { coy } from 'react-syntax-highlighter/dist/esm/styles/prism';
+import { DownOutlined, RightOutlined, BulbOutlined } from "@ant-design/icons";
+
+interface ReasoningContentProps {
+  reasoningContent: string;
+}
+
+const ReasoningContent: React.FC<ReasoningContentProps> = ({ reasoningContent }) => {
+  const [isExpanded, setIsExpanded] = useState(true);
+
+  if (!reasoningContent) return null;
+
+  return (
+    <div className="reasoning-content mt-1 mb-2">
+      <Button 
+        type="text" 
+        className="flex items-center text-xs text-gray-500 hover:text-gray-700"
+        onClick={() => setIsExpanded(!isExpanded)}
+        icon={<BulbOutlined />}
+      >
+        {isExpanded ? "Hide reasoning" : "Show reasoning"}
+        {isExpanded ? <DownOutlined className="ml-1" /> : <RightOutlined className="ml-1" />}
+      </Button>
+      
+      {isExpanded && (
+        <div className="mt-2 p-3 bg-gray-50 border border-gray-200 rounded-md text-sm text-gray-700">
+          <ReactMarkdown
+            components={{
+              code({node, inline, className, children, ...props}: React.ComponentPropsWithoutRef<'code'> & {
+                inline?: boolean;
+                node?: any;
+              }) {
+                const match = /language-(\w+)/.exec(className || '');
+                return !inline && match ? (
+                  <SyntaxHighlighter
+                    style={coy as any}
+                    language={match[1]}
+                    PreTag="div"
+                    className="rounded-md my-2"
+                    {...props}
+                  >
+                    {String(children).replace(/\n$/, '')}
+                  </SyntaxHighlighter>
+                ) : (
+                  <code className={`${className} px-1.5 py-0.5 rounded bg-gray-100 text-sm font-mono`} {...props}>
+                    {children}
+                  </code>
+                );
+              }
+            }}
+          >
+            {reasoningContent}
+          </ReactMarkdown>
+        </div>
+      )}
+    </div>
+  );
+};
+
+export default ReasoningContent; 
--- a/ui/litellm-dashboard/src/components/chat_ui/ResponseMetrics.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui/ResponseMetrics.tsx
@ -0,0 +1,80 @@
+import React from "react";
+import { Tooltip } from "antd";
+import { 
+  ClockCircleOutlined, 
+  NumberOutlined, 
+  ImportOutlined, 
+  ExportOutlined,
+  ThunderboltOutlined,
+  BulbOutlined
+} from "@ant-design/icons";
+
+export interface TokenUsage {
+  completionTokens?: number;
+  promptTokens?: number;
+  totalTokens?: number;
+  reasoningTokens?: number;
+}
+
+interface ResponseMetricsProps {
+  timeToFirstToken?: number; // in milliseconds
+  usage?: TokenUsage;
+}
+
+const ResponseMetrics: React.FC<ResponseMetricsProps> = ({ 
+  timeToFirstToken, 
+  usage 
+}) => {
+  if (!timeToFirstToken && !usage) return null;
+
+  return (
+    <div className="response-metrics mt-2 pt-2 border-t border-gray-100 text-xs text-gray-500 flex flex-wrap gap-3">
+      {timeToFirstToken !== undefined && (
+        <Tooltip title="Time to first token">
+          <div className="flex items-center">
+            <ClockCircleOutlined className="mr-1" />
+            <span>{(timeToFirstToken / 1000).toFixed(2)}s</span>
+          </div>
+        </Tooltip>
+      )}
+      
+      {usage?.promptTokens !== undefined && (
+        <Tooltip title="Prompt tokens">
+          <div className="flex items-center">
+            <ImportOutlined className="mr-1" />
+            <span>In: {usage.promptTokens}</span>
+          </div>
+        </Tooltip>
+      )}
+      
+      {usage?.completionTokens !== undefined && (
+        <Tooltip title="Completion tokens">
+          <div className="flex items-center">
+            <ExportOutlined className="mr-1" />
+            <span>Out: {usage.completionTokens}</span>
+          </div>
+        </Tooltip>
+      )}
+      
+      {usage?.reasoningTokens !== undefined && (
+        <Tooltip title="Reasoning tokens">
+          <div className="flex items-center">
+            <BulbOutlined className="mr-1" />
+            <span>Reasoning: {usage.reasoningTokens}</span>
+          </div>
+        </Tooltip>
+      )}
+      
+      {usage?.totalTokens !== undefined && (
+        <Tooltip title="Total tokens">
+          <div className="flex items-center">
+            <NumberOutlined className="mr-1" />
+            <span>Total: {usage.totalTokens}</span>
+          </div>
+        </Tooltip>
+      )}
+    </div>
+  );
+};
+
+export default ResponseMetrics; 
--- a/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/chat_completion.tsx
@ -1,14 +1,18 @@
 import openai from "openai";
 import { ChatCompletionMessageParam } from "openai/resources/chat/completions";
 import { message } from "antd";
+import { TokenUsage } from "../ResponseMetrics";

 export async function makeOpenAIChatCompletionRequest(
    chatHistory: { role: string; content: string }[],
-    updateUI: (chunk: string, model: string) => void,
+    updateUI: (chunk: string, model?: string) => void,
    selectedModel: string,
    accessToken: string,
    tags?: string[],
-    signal?: AbortSignal
+    signal?: AbortSignal,
+    onReasoningContent?: (content: string) => void,
+    onTimingData?: (timeToFirstToken: number) => void,
+    onUsageData?: (usage: TokenUsage) => void
  ) {
    // base url should be the current base_url
    const isLocal = process.env.NODE_ENV === "development";
@ -20,23 +24,85 @@ export async function makeOpenAIChatCompletionRequest(
      ? "http://localhost:4000"
      : window.location.origin;
    const client = new openai.OpenAI({
-      apiKey: accessToken, // Replace with your OpenAI API key
-      baseURL: proxyBaseUrl, // Replace with your OpenAI API base URL
-      dangerouslyAllowBrowser: true, // using a temporary litellm proxy key
+      apiKey: accessToken,
+      baseURL: proxyBaseUrl,
+      dangerouslyAllowBrowser: true,
      defaultHeaders: tags && tags.length > 0 ? { 'x-litellm-tags': tags.join(',') } : undefined,
    });
  
    try {
+      const startTime = Date.now();
+      let firstTokenReceived = false;
+      let timeToFirstToken: number | undefined = undefined;
+      
+      // For collecting complete response text
+      let fullResponseContent = "";
+      let fullReasoningContent = "";
+
      const response = await client.chat.completions.create({
        model: selectedModel,
        stream: true,
+        stream_options: {
+          include_usage: true,
+        },
        messages: chatHistory as ChatCompletionMessageParam[],
      }, { signal });
  
      for await (const chunk of response) {
-        console.log(chunk);
-        if (chunk.choices[0].delta.content) {
-          updateUI(chunk.choices[0].delta.content, chunk.model);
+        console.log("Stream chunk:", chunk);
+        
+        // Process content and measure time to first token
+        const delta = chunk.choices[0]?.delta as any;
+        
+        // Debug what's in the delta
+        console.log("Delta content:", chunk.choices[0]?.delta?.content);
+        console.log("Delta reasoning content:", delta?.reasoning_content);
+        
+        // Measure time to first token for either content or reasoning_content
+        if (!firstTokenReceived && (chunk.choices[0]?.delta?.content || (delta && delta.reasoning_content))) {
+          firstTokenReceived = true;
+          timeToFirstToken = Date.now() - startTime;
+          console.log("First token received! Time:", timeToFirstToken, "ms");
+          if (onTimingData) {
+            console.log("Calling onTimingData with:", timeToFirstToken);
+            onTimingData(timeToFirstToken);
+          } else {
+            console.log("onTimingData callback is not defined!");
+          }
+        }
+        
+        // Process content
+        if (chunk.choices[0]?.delta?.content) {
+          const content = chunk.choices[0].delta.content;
+          updateUI(content, chunk.model);
+          fullResponseContent += content;
+        }
+        
+        // Process reasoning content if present - using type assertion
+        if (delta && delta.reasoning_content) {
+          const reasoningContent = delta.reasoning_content;
+          if (onReasoningContent) {
+            onReasoningContent(reasoningContent);
+          }
+          fullReasoningContent += reasoningContent;
+        }
+        
+        // Check for usage data using type assertion
+        const chunkWithUsage = chunk as any;
+        if (chunkWithUsage.usage && onUsageData) {
+          console.log("Usage data found:", chunkWithUsage.usage);
+          const usageData: TokenUsage = {
+            completionTokens: chunkWithUsage.usage.completion_tokens,
+            promptTokens: chunkWithUsage.usage.prompt_tokens,
+            totalTokens: chunkWithUsage.usage.total_tokens,
+          };
+          
+          // Check for reasoning tokens
+          if (chunkWithUsage.usage.completion_tokens_details?.reasoning_tokens) {
+            usageData.reasoningTokens = chunkWithUsage.usage.completion_tokens_details.reasoning_tokens;
+          }
+          
+          onUsageData(usageData);
        }
      }
    } catch (error) {
--- a/ui/litellm-dashboard/src/components/chat_ui/llm_calls/process_stream.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/process_stream.tsx
@ -0,0 +1,96 @@
+import { TokenUsage } from "../ResponseMetrics";
+
+export interface StreamingResponse {
+  id: string;
+  created: number;
+  model: string;
+  object: string;
+  system_fingerprint?: string;
+  choices: StreamingChoices[];
+  provider_specific_fields?: any;
+  stream_options?: any;
+  citations?: any;
+  usage?: Usage;
+}
+
+export interface StreamingChoices {
+  finish_reason?: string | null;
+  index: number;
+  delta: Delta;
+  logprobs?: any;
+}
+
+export interface Delta {
+  content?: string;
+  reasoning_content?: string;
+  role?: string;
+  function_call?: any;
+  tool_calls?: any;
+  audio?: any;
+  refusal?: any;
+  provider_specific_fields?: any;
+}
+
+export interface Usage {
+  completion_tokens: number;
+  prompt_tokens: number;
+  total_tokens: number;
+  completion_tokens_details?: {
+    accepted_prediction_tokens?: number;
+    audio_tokens?: number;
+    reasoning_tokens?: number;
+    rejected_prediction_tokens?: number;
+    text_tokens?: number | null;
+  };
+  prompt_tokens_details?: {
+    audio_tokens?: number;
+    cached_tokens?: number;
+    text_tokens?: number;
+    image_tokens?: number;
+  };
+}
+
+export interface StreamProcessCallbacks {
+  onContent: (content: string, model?: string) => void;
+  onReasoningContent: (content: string) => void;
+  onUsage?: (usage: TokenUsage) => void;
+}
+
+export const processStreamingResponse = (
+  response: StreamingResponse, 
+  callbacks: StreamProcessCallbacks
+) => {
+  // Extract model information if available
+  const model = response.model;
+  
+  // Process regular content
+  if (response.choices && response.choices.length > 0) {
+    const choice = response.choices[0];
+    
+    if (choice.delta?.content) {
+      callbacks.onContent(choice.delta.content, model);
+    }
+    
+    // Process reasoning content if it exists
+    if (choice.delta?.reasoning_content) {
+      callbacks.onReasoningContent(choice.delta.reasoning_content);
+    }
+  }
+  
+  // Process usage information if it exists and we have a handler
+  if (response.usage && callbacks.onUsage) {
+    console.log("Processing usage data:", response.usage);
+    const usageData: TokenUsage = {
+      completionTokens: response.usage.completion_tokens,
+      promptTokens: response.usage.prompt_tokens,
+      totalTokens: response.usage.total_tokens,
+    };
+    
+    // Extract reasoning tokens if available
+    if (response.usage.completion_tokens_details?.reasoning_tokens) {
+      usageData.reasoningTokens = response.usage.completion_tokens_details.reasoning_tokens;
+    }
+    
+    callbacks.onUsage(usageData);
+  }
+}; 
--- a/ui/litellm-dashboard/src/components/chat_ui/types.ts
+++ b/ui/litellm-dashboard/src/components/chat_ui/types.ts
@ -0,0 +1,68 @@
+export interface Delta {
+  content?: string;
+  reasoning_content?: string;
+  role?: string;
+  function_call?: any;
+  tool_calls?: any;
+  audio?: any;
+  refusal?: any;
+  provider_specific_fields?: any;
+}
+
+export interface CompletionTokensDetails {
+  accepted_prediction_tokens?: number;
+  audio_tokens?: number;
+  reasoning_tokens?: number;
+  rejected_prediction_tokens?: number;
+  text_tokens?: number | null;
+}
+
+export interface PromptTokensDetails {
+  audio_tokens?: number;
+  cached_tokens?: number;
+  text_tokens?: number;
+  image_tokens?: number;
+}
+
+export interface Usage {
+  completion_tokens: number;
+  prompt_tokens: number;
+  total_tokens: number;
+  completion_tokens_details?: CompletionTokensDetails;
+  prompt_tokens_details?: PromptTokensDetails;
+}
+
+export interface StreamingChoices {
+  finish_reason?: string | null;
+  index: number;
+  delta: Delta;
+  logprobs?: any;
+}
+
+export interface StreamingResponse {
+  id: string;
+  created: number;
+  model: string;
+  object: string;
+  system_fingerprint?: string;
+  choices: StreamingChoices[];
+  provider_specific_fields?: any;
+  stream_options?: any;
+  citations?: any;
+  usage?: Usage;
+}
+
+export interface MessageType {
+  role: string;
+  content: string;
+  model?: string;
+  isImage?: boolean;
+  reasoningContent?: string;
+  timeToFirstToken?: number;
+  usage?: {
+    completionTokens?: number;
+    promptTokens?: number;
+    totalTokens?: number;
+    reasoningTokens?: number;
+  };
+}