chat ui, show num reasoning tokens used

This commit is contained in:
Ishaan Jaff 2025-04-11 18:53:22 -07:00
parent f314e08ab4
commit 4d193816db
5 changed files with 313 additions and 13 deletions

View file

@ -35,6 +35,7 @@ import TagSelector from "./tag_management/TagSelector";
import { determineEndpointType } from "./chat_ui/EndpointUtils"; import { determineEndpointType } from "./chat_ui/EndpointUtils";
import { MessageType } from "./chat_ui/types"; import { MessageType } from "./chat_ui/types";
import ReasoningContent from "./chat_ui/ReasoningContent"; import ReasoningContent from "./chat_ui/ReasoningContent";
import ResponseMetrics, { TokenUsage } from "./chat_ui/ResponseMetrics";
import { import {
SendOutlined, SendOutlined,
ApiOutlined, ApiOutlined,
@ -185,6 +186,47 @@ const ChatUI: React.FC<ChatUIProps> = ({
}); });
}; };
const updateTimingData = (timeToFirstToken: number) => {
setChatHistory((prevHistory) => {
const lastMessage = prevHistory[prevHistory.length - 1];
if (lastMessage && lastMessage.role === "assistant") {
return [
...prevHistory.slice(0, prevHistory.length - 1),
{
...lastMessage,
timeToFirstToken
},
];
}
return prevHistory;
});
};
const updateUsageData = (usage: TokenUsage) => {
console.log("Received usage data:", usage);
setChatHistory((prevHistory) => {
const lastMessage = prevHistory[prevHistory.length - 1];
if (lastMessage && lastMessage.role === "assistant") {
console.log("Updating message with usage data:", usage);
const updatedMessage = {
...lastMessage,
usage
};
console.log("Updated message:", updatedMessage);
return [
...prevHistory.slice(0, prevHistory.length - 1),
updatedMessage
];
}
return prevHistory;
});
};
const updateImageUI = (imageUrl: string, model: string) => { const updateImageUI = (imageUrl: string, model: string) => {
setChatHistory((prevHistory) => [ setChatHistory((prevHistory) => [
...prevHistory, ...prevHistory,
@ -248,7 +290,9 @@ const ChatUI: React.FC<ChatUIProps> = ({
effectiveApiKey, effectiveApiKey,
selectedTags, selectedTags,
signal, signal,
updateReasoningContent updateReasoningContent,
updateTimingData,
updateUsageData
); );
} else if (endpointType === EndpointType.IMAGE) { } else if (endpointType === EndpointType.IMAGE) {
// For image generation // For image generation
@ -503,6 +547,13 @@ const ChatUI: React.FC<ChatUIProps> = ({
{message.content} {message.content}
</ReactMarkdown> </ReactMarkdown>
)} )}
{message.role === "assistant" && (message.timeToFirstToken || message.usage) && (
<ResponseMetrics
timeToFirstToken={message.timeToFirstToken}
usage={message.usage}
/>
)}
</div> </div>
</div> </div>
</div> </div>

View file

@ -0,0 +1,80 @@
import React from "react";
import { Tooltip } from "antd";
import {
ClockCircleOutlined,
NumberOutlined,
ImportOutlined,
ExportOutlined,
ThunderboltOutlined,
BulbOutlined
} from "@ant-design/icons";
export interface TokenUsage {
completionTokens?: number;
promptTokens?: number;
totalTokens?: number;
reasoningTokens?: number;
}
interface ResponseMetricsProps {
timeToFirstToken?: number; // in milliseconds
usage?: TokenUsage;
}
const ResponseMetrics: React.FC<ResponseMetricsProps> = ({
timeToFirstToken,
usage
}) => {
if (!timeToFirstToken && !usage) return null;
return (
<div className="response-metrics mt-2 pt-2 border-t border-gray-100 text-xs text-gray-500 flex flex-wrap gap-3">
{timeToFirstToken !== undefined && (
<Tooltip title="Time to first token">
<div className="flex items-center">
<ClockCircleOutlined className="mr-1" />
<span>{(timeToFirstToken / 1000).toFixed(2)}s</span>
</div>
</Tooltip>
)}
{usage?.promptTokens !== undefined && (
<Tooltip title="Prompt tokens">
<div className="flex items-center">
<ImportOutlined className="mr-1" />
<span>In: {usage.promptTokens}</span>
</div>
</Tooltip>
)}
{usage?.completionTokens !== undefined && (
<Tooltip title="Completion tokens">
<div className="flex items-center">
<ExportOutlined className="mr-1" />
<span>Out: {usage.completionTokens}</span>
</div>
</Tooltip>
)}
{usage?.reasoningTokens !== undefined && (
<Tooltip title="Reasoning tokens">
<div className="flex items-center">
<BulbOutlined className="mr-1" />
<span>Reasoning: {usage.reasoningTokens}</span>
</div>
</Tooltip>
)}
{usage?.totalTokens !== undefined && (
<Tooltip title="Total tokens">
<div className="flex items-center">
<NumberOutlined className="mr-1" />
<span>Total: {usage.totalTokens}</span>
</div>
</Tooltip>
)}
</div>
);
};
export default ResponseMetrics;

View file

@ -1,7 +1,7 @@
import openai from "openai"; import openai from "openai";
import { ChatCompletionMessageParam } from "openai/resources/chat/completions"; import { ChatCompletionMessageParam } from "openai/resources/chat/completions";
import { message } from "antd"; import { message } from "antd";
import { processStreamingResponse } from "./process_stream"; import { TokenUsage } from "../ResponseMetrics";
export async function makeOpenAIChatCompletionRequest( export async function makeOpenAIChatCompletionRequest(
chatHistory: { role: string; content: string }[], chatHistory: { role: string; content: string }[],
@ -10,7 +10,9 @@ export async function makeOpenAIChatCompletionRequest(
accessToken: string, accessToken: string,
tags?: string[], tags?: string[],
signal?: AbortSignal, signal?: AbortSignal,
onReasoningContent?: (content: string) => void onReasoningContent?: (content: string) => void,
onTimingData?: (timeToFirstToken: number) => void,
onUsageData?: (usage: TokenUsage) => void
) { ) {
// base url should be the current base_url // base url should be the current base_url
const isLocal = process.env.NODE_ENV === "development"; const isLocal = process.env.NODE_ENV === "development";
@ -22,13 +24,21 @@ export async function makeOpenAIChatCompletionRequest(
? "http://localhost:4000" ? "http://localhost:4000"
: window.location.origin; : window.location.origin;
const client = new openai.OpenAI({ const client = new openai.OpenAI({
apiKey: accessToken, // Replace with your OpenAI API key apiKey: accessToken,
baseURL: proxyBaseUrl, // Replace with your OpenAI API base URL baseURL: proxyBaseUrl,
dangerouslyAllowBrowser: true, // using a temporary litellm proxy key dangerouslyAllowBrowser: true,
defaultHeaders: tags && tags.length > 0 ? { 'x-litellm-tags': tags.join(',') } : undefined, defaultHeaders: tags && tags.length > 0 ? { 'x-litellm-tags': tags.join(',') } : undefined,
}); });
try { try {
const startTime = Date.now();
let firstTokenReceived = false;
let timeToFirstToken: number | undefined = undefined;
// For collecting complete response text
let fullResponseContent = "";
let fullReasoningContent = "";
const response = await client.chat.completions.create({ const response = await client.chat.completions.create({
model: selectedModel, model: selectedModel,
stream: true, stream: true,
@ -36,12 +46,72 @@ export async function makeOpenAIChatCompletionRequest(
}, { signal }); }, { signal });
for await (const chunk of response) { for await (const chunk of response) {
console.log(chunk); console.log("Stream chunk:", chunk);
// Process the chunk using our utility
processStreamingResponse(chunk, { // Measure time to first token
onContent: updateUI, if (!firstTokenReceived && chunk.choices[0]?.delta?.content) {
onReasoningContent: onReasoningContent || (() => {}) firstTokenReceived = true;
}); timeToFirstToken = Date.now() - startTime;
if (onTimingData) {
onTimingData(timeToFirstToken);
}
}
// Process content
if (chunk.choices[0]?.delta?.content) {
const content = chunk.choices[0].delta.content;
updateUI(content, chunk.model);
fullResponseContent += content;
}
// Process reasoning content if present - using type assertion
const delta = chunk.choices[0]?.delta as any;
if (delta && delta.reasoning_content) {
const reasoningContent = delta.reasoning_content;
if (onReasoningContent) {
onReasoningContent(reasoningContent);
}
fullReasoningContent += reasoningContent;
}
// Check for usage data using type assertion
const chunkWithUsage = chunk as any;
if (chunkWithUsage.usage && onUsageData) {
console.log("Usage data found:", chunkWithUsage.usage);
const usageData: TokenUsage = {
completionTokens: chunkWithUsage.usage.completion_tokens,
promptTokens: chunkWithUsage.usage.prompt_tokens,
totalTokens: chunkWithUsage.usage.total_tokens,
};
// Check for reasoning tokens
if (chunkWithUsage.usage.completion_tokens_details?.reasoning_tokens) {
usageData.reasoningTokens = chunkWithUsage.usage.completion_tokens_details.reasoning_tokens;
}
onUsageData(usageData);
}
}
// Always create an estimated usage
if (onUsageData) {
try {
console.log("Creating estimated usage data");
// Create a simple usage estimate - approximately 4 characters per token
const estimatedUsage: TokenUsage = {
promptTokens: Math.ceil(JSON.stringify(chatHistory).length / 4),
completionTokens: Math.ceil((fullResponseContent.length) / 4),
totalTokens: Math.ceil((JSON.stringify(chatHistory).length + fullResponseContent.length) / 4)
};
if (fullReasoningContent) {
estimatedUsage.reasoningTokens = Math.ceil(fullReasoningContent.length / 4);
}
onUsageData(estimatedUsage);
} catch (error) {
console.error("Error estimating usage data:", error);
}
} }
} catch (error) { } catch (error) {
if (signal?.aborted) { if (signal?.aborted) {

View file

@ -1,8 +1,59 @@
import { StreamingResponse } from "../types"; import { TokenUsage } from "../ResponseMetrics";
export interface StreamingResponse {
id: string;
created: number;
model: string;
object: string;
system_fingerprint?: string;
choices: StreamingChoices[];
provider_specific_fields?: any;
stream_options?: any;
citations?: any;
usage?: Usage;
}
export interface StreamingChoices {
finish_reason?: string | null;
index: number;
delta: Delta;
logprobs?: any;
}
export interface Delta {
content?: string;
reasoning_content?: string;
role?: string;
function_call?: any;
tool_calls?: any;
audio?: any;
refusal?: any;
provider_specific_fields?: any;
}
export interface Usage {
completion_tokens: number;
prompt_tokens: number;
total_tokens: number;
completion_tokens_details?: {
accepted_prediction_tokens?: number;
audio_tokens?: number;
reasoning_tokens?: number;
rejected_prediction_tokens?: number;
text_tokens?: number | null;
};
prompt_tokens_details?: {
audio_tokens?: number;
cached_tokens?: number;
text_tokens?: number;
image_tokens?: number;
};
}
export interface StreamProcessCallbacks { export interface StreamProcessCallbacks {
onContent: (content: string, model?: string) => void; onContent: (content: string, model?: string) => void;
onReasoningContent: (content: string) => void; onReasoningContent: (content: string) => void;
onUsage?: (usage: TokenUsage) => void;
} }
export const processStreamingResponse = ( export const processStreamingResponse = (
@ -25,4 +76,21 @@ export const processStreamingResponse = (
callbacks.onReasoningContent(choice.delta.reasoning_content); callbacks.onReasoningContent(choice.delta.reasoning_content);
} }
} }
// Process usage information if it exists and we have a handler
if (response.usage && callbacks.onUsage) {
console.log("Processing usage data:", response.usage);
const usageData: TokenUsage = {
completionTokens: response.usage.completion_tokens,
promptTokens: response.usage.prompt_tokens,
totalTokens: response.usage.total_tokens,
};
// Extract reasoning tokens if available
if (response.usage.completion_tokens_details?.reasoning_tokens) {
usageData.reasoningTokens = response.usage.completion_tokens_details.reasoning_tokens;
}
callbacks.onUsage(usageData);
}
}; };

View file

@ -9,6 +9,29 @@ export interface Delta {
provider_specific_fields?: any; provider_specific_fields?: any;
} }
export interface CompletionTokensDetails {
accepted_prediction_tokens?: number;
audio_tokens?: number;
reasoning_tokens?: number;
rejected_prediction_tokens?: number;
text_tokens?: number | null;
}
export interface PromptTokensDetails {
audio_tokens?: number;
cached_tokens?: number;
text_tokens?: number;
image_tokens?: number;
}
export interface Usage {
completion_tokens: number;
prompt_tokens: number;
total_tokens: number;
completion_tokens_details?: CompletionTokensDetails;
prompt_tokens_details?: PromptTokensDetails;
}
export interface StreamingChoices { export interface StreamingChoices {
finish_reason?: string | null; finish_reason?: string | null;
index: number; index: number;
@ -26,6 +49,7 @@ export interface StreamingResponse {
provider_specific_fields?: any; provider_specific_fields?: any;
stream_options?: any; stream_options?: any;
citations?: any; citations?: any;
usage?: Usage;
} }
export interface MessageType { export interface MessageType {
@ -34,4 +58,11 @@ export interface MessageType {
model?: string; model?: string;
isImage?: boolean; isImage?: boolean;
reasoningContent?: string; reasoningContent?: string;
timeToFirstToken?: number;
usage?: {
completionTokens?: number;
promptTokens?: number;
totalTokens?: number;
reasoningTokens?: number;
};
} }