mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
chat ui, show num reasoning tokens used
This commit is contained in:
parent
f314e08ab4
commit
4d193816db
5 changed files with 313 additions and 13 deletions
|
@ -35,6 +35,7 @@ import TagSelector from "./tag_management/TagSelector";
|
||||||
import { determineEndpointType } from "./chat_ui/EndpointUtils";
|
import { determineEndpointType } from "./chat_ui/EndpointUtils";
|
||||||
import { MessageType } from "./chat_ui/types";
|
import { MessageType } from "./chat_ui/types";
|
||||||
import ReasoningContent from "./chat_ui/ReasoningContent";
|
import ReasoningContent from "./chat_ui/ReasoningContent";
|
||||||
|
import ResponseMetrics, { TokenUsage } from "./chat_ui/ResponseMetrics";
|
||||||
import {
|
import {
|
||||||
SendOutlined,
|
SendOutlined,
|
||||||
ApiOutlined,
|
ApiOutlined,
|
||||||
|
@ -185,6 +186,47 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const updateTimingData = (timeToFirstToken: number) => {
|
||||||
|
setChatHistory((prevHistory) => {
|
||||||
|
const lastMessage = prevHistory[prevHistory.length - 1];
|
||||||
|
|
||||||
|
if (lastMessage && lastMessage.role === "assistant") {
|
||||||
|
return [
|
||||||
|
...prevHistory.slice(0, prevHistory.length - 1),
|
||||||
|
{
|
||||||
|
...lastMessage,
|
||||||
|
timeToFirstToken
|
||||||
|
},
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return prevHistory;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const updateUsageData = (usage: TokenUsage) => {
|
||||||
|
console.log("Received usage data:", usage);
|
||||||
|
setChatHistory((prevHistory) => {
|
||||||
|
const lastMessage = prevHistory[prevHistory.length - 1];
|
||||||
|
|
||||||
|
if (lastMessage && lastMessage.role === "assistant") {
|
||||||
|
console.log("Updating message with usage data:", usage);
|
||||||
|
const updatedMessage = {
|
||||||
|
...lastMessage,
|
||||||
|
usage
|
||||||
|
};
|
||||||
|
console.log("Updated message:", updatedMessage);
|
||||||
|
|
||||||
|
return [
|
||||||
|
...prevHistory.slice(0, prevHistory.length - 1),
|
||||||
|
updatedMessage
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return prevHistory;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
const updateImageUI = (imageUrl: string, model: string) => {
|
const updateImageUI = (imageUrl: string, model: string) => {
|
||||||
setChatHistory((prevHistory) => [
|
setChatHistory((prevHistory) => [
|
||||||
...prevHistory,
|
...prevHistory,
|
||||||
|
@ -248,7 +290,9 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
||||||
effectiveApiKey,
|
effectiveApiKey,
|
||||||
selectedTags,
|
selectedTags,
|
||||||
signal,
|
signal,
|
||||||
updateReasoningContent
|
updateReasoningContent,
|
||||||
|
updateTimingData,
|
||||||
|
updateUsageData
|
||||||
);
|
);
|
||||||
} else if (endpointType === EndpointType.IMAGE) {
|
} else if (endpointType === EndpointType.IMAGE) {
|
||||||
// For image generation
|
// For image generation
|
||||||
|
@ -503,6 +547,13 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
||||||
{message.content}
|
{message.content}
|
||||||
</ReactMarkdown>
|
</ReactMarkdown>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{message.role === "assistant" && (message.timeToFirstToken || message.usage) && (
|
||||||
|
<ResponseMetrics
|
||||||
|
timeToFirstToken={message.timeToFirstToken}
|
||||||
|
usage={message.usage}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -0,0 +1,80 @@
|
||||||
|
import React from "react";
|
||||||
|
import { Tooltip } from "antd";
|
||||||
|
import {
|
||||||
|
ClockCircleOutlined,
|
||||||
|
NumberOutlined,
|
||||||
|
ImportOutlined,
|
||||||
|
ExportOutlined,
|
||||||
|
ThunderboltOutlined,
|
||||||
|
BulbOutlined
|
||||||
|
} from "@ant-design/icons";
|
||||||
|
|
||||||
|
export interface TokenUsage {
|
||||||
|
completionTokens?: number;
|
||||||
|
promptTokens?: number;
|
||||||
|
totalTokens?: number;
|
||||||
|
reasoningTokens?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ResponseMetricsProps {
|
||||||
|
timeToFirstToken?: number; // in milliseconds
|
||||||
|
usage?: TokenUsage;
|
||||||
|
}
|
||||||
|
|
||||||
|
const ResponseMetrics: React.FC<ResponseMetricsProps> = ({
|
||||||
|
timeToFirstToken,
|
||||||
|
usage
|
||||||
|
}) => {
|
||||||
|
if (!timeToFirstToken && !usage) return null;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="response-metrics mt-2 pt-2 border-t border-gray-100 text-xs text-gray-500 flex flex-wrap gap-3">
|
||||||
|
{timeToFirstToken !== undefined && (
|
||||||
|
<Tooltip title="Time to first token">
|
||||||
|
<div className="flex items-center">
|
||||||
|
<ClockCircleOutlined className="mr-1" />
|
||||||
|
<span>{(timeToFirstToken / 1000).toFixed(2)}s</span>
|
||||||
|
</div>
|
||||||
|
</Tooltip>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{usage?.promptTokens !== undefined && (
|
||||||
|
<Tooltip title="Prompt tokens">
|
||||||
|
<div className="flex items-center">
|
||||||
|
<ImportOutlined className="mr-1" />
|
||||||
|
<span>In: {usage.promptTokens}</span>
|
||||||
|
</div>
|
||||||
|
</Tooltip>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{usage?.completionTokens !== undefined && (
|
||||||
|
<Tooltip title="Completion tokens">
|
||||||
|
<div className="flex items-center">
|
||||||
|
<ExportOutlined className="mr-1" />
|
||||||
|
<span>Out: {usage.completionTokens}</span>
|
||||||
|
</div>
|
||||||
|
</Tooltip>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{usage?.reasoningTokens !== undefined && (
|
||||||
|
<Tooltip title="Reasoning tokens">
|
||||||
|
<div className="flex items-center">
|
||||||
|
<BulbOutlined className="mr-1" />
|
||||||
|
<span>Reasoning: {usage.reasoningTokens}</span>
|
||||||
|
</div>
|
||||||
|
</Tooltip>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{usage?.totalTokens !== undefined && (
|
||||||
|
<Tooltip title="Total tokens">
|
||||||
|
<div className="flex items-center">
|
||||||
|
<NumberOutlined className="mr-1" />
|
||||||
|
<span>Total: {usage.totalTokens}</span>
|
||||||
|
</div>
|
||||||
|
</Tooltip>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default ResponseMetrics;
|
|
@ -1,7 +1,7 @@
|
||||||
import openai from "openai";
|
import openai from "openai";
|
||||||
import { ChatCompletionMessageParam } from "openai/resources/chat/completions";
|
import { ChatCompletionMessageParam } from "openai/resources/chat/completions";
|
||||||
import { message } from "antd";
|
import { message } from "antd";
|
||||||
import { processStreamingResponse } from "./process_stream";
|
import { TokenUsage } from "../ResponseMetrics";
|
||||||
|
|
||||||
export async function makeOpenAIChatCompletionRequest(
|
export async function makeOpenAIChatCompletionRequest(
|
||||||
chatHistory: { role: string; content: string }[],
|
chatHistory: { role: string; content: string }[],
|
||||||
|
@ -10,7 +10,9 @@ export async function makeOpenAIChatCompletionRequest(
|
||||||
accessToken: string,
|
accessToken: string,
|
||||||
tags?: string[],
|
tags?: string[],
|
||||||
signal?: AbortSignal,
|
signal?: AbortSignal,
|
||||||
onReasoningContent?: (content: string) => void
|
onReasoningContent?: (content: string) => void,
|
||||||
|
onTimingData?: (timeToFirstToken: number) => void,
|
||||||
|
onUsageData?: (usage: TokenUsage) => void
|
||||||
) {
|
) {
|
||||||
// base url should be the current base_url
|
// base url should be the current base_url
|
||||||
const isLocal = process.env.NODE_ENV === "development";
|
const isLocal = process.env.NODE_ENV === "development";
|
||||||
|
@ -22,13 +24,21 @@ export async function makeOpenAIChatCompletionRequest(
|
||||||
? "http://localhost:4000"
|
? "http://localhost:4000"
|
||||||
: window.location.origin;
|
: window.location.origin;
|
||||||
const client = new openai.OpenAI({
|
const client = new openai.OpenAI({
|
||||||
apiKey: accessToken, // Replace with your OpenAI API key
|
apiKey: accessToken,
|
||||||
baseURL: proxyBaseUrl, // Replace with your OpenAI API base URL
|
baseURL: proxyBaseUrl,
|
||||||
dangerouslyAllowBrowser: true, // using a temporary litellm proxy key
|
dangerouslyAllowBrowser: true,
|
||||||
defaultHeaders: tags && tags.length > 0 ? { 'x-litellm-tags': tags.join(',') } : undefined,
|
defaultHeaders: tags && tags.length > 0 ? { 'x-litellm-tags': tags.join(',') } : undefined,
|
||||||
});
|
});
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
const startTime = Date.now();
|
||||||
|
let firstTokenReceived = false;
|
||||||
|
let timeToFirstToken: number | undefined = undefined;
|
||||||
|
|
||||||
|
// For collecting complete response text
|
||||||
|
let fullResponseContent = "";
|
||||||
|
let fullReasoningContent = "";
|
||||||
|
|
||||||
const response = await client.chat.completions.create({
|
const response = await client.chat.completions.create({
|
||||||
model: selectedModel,
|
model: selectedModel,
|
||||||
stream: true,
|
stream: true,
|
||||||
|
@ -36,12 +46,72 @@ export async function makeOpenAIChatCompletionRequest(
|
||||||
}, { signal });
|
}, { signal });
|
||||||
|
|
||||||
for await (const chunk of response) {
|
for await (const chunk of response) {
|
||||||
console.log(chunk);
|
console.log("Stream chunk:", chunk);
|
||||||
// Process the chunk using our utility
|
|
||||||
processStreamingResponse(chunk, {
|
// Measure time to first token
|
||||||
onContent: updateUI,
|
if (!firstTokenReceived && chunk.choices[0]?.delta?.content) {
|
||||||
onReasoningContent: onReasoningContent || (() => {})
|
firstTokenReceived = true;
|
||||||
});
|
timeToFirstToken = Date.now() - startTime;
|
||||||
|
if (onTimingData) {
|
||||||
|
onTimingData(timeToFirstToken);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process content
|
||||||
|
if (chunk.choices[0]?.delta?.content) {
|
||||||
|
const content = chunk.choices[0].delta.content;
|
||||||
|
updateUI(content, chunk.model);
|
||||||
|
fullResponseContent += content;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process reasoning content if present - using type assertion
|
||||||
|
const delta = chunk.choices[0]?.delta as any;
|
||||||
|
if (delta && delta.reasoning_content) {
|
||||||
|
const reasoningContent = delta.reasoning_content;
|
||||||
|
if (onReasoningContent) {
|
||||||
|
onReasoningContent(reasoningContent);
|
||||||
|
}
|
||||||
|
fullReasoningContent += reasoningContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for usage data using type assertion
|
||||||
|
const chunkWithUsage = chunk as any;
|
||||||
|
if (chunkWithUsage.usage && onUsageData) {
|
||||||
|
console.log("Usage data found:", chunkWithUsage.usage);
|
||||||
|
const usageData: TokenUsage = {
|
||||||
|
completionTokens: chunkWithUsage.usage.completion_tokens,
|
||||||
|
promptTokens: chunkWithUsage.usage.prompt_tokens,
|
||||||
|
totalTokens: chunkWithUsage.usage.total_tokens,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check for reasoning tokens
|
||||||
|
if (chunkWithUsage.usage.completion_tokens_details?.reasoning_tokens) {
|
||||||
|
usageData.reasoningTokens = chunkWithUsage.usage.completion_tokens_details.reasoning_tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
onUsageData(usageData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always create an estimated usage
|
||||||
|
if (onUsageData) {
|
||||||
|
try {
|
||||||
|
console.log("Creating estimated usage data");
|
||||||
|
// Create a simple usage estimate - approximately 4 characters per token
|
||||||
|
const estimatedUsage: TokenUsage = {
|
||||||
|
promptTokens: Math.ceil(JSON.stringify(chatHistory).length / 4),
|
||||||
|
completionTokens: Math.ceil((fullResponseContent.length) / 4),
|
||||||
|
totalTokens: Math.ceil((JSON.stringify(chatHistory).length + fullResponseContent.length) / 4)
|
||||||
|
};
|
||||||
|
|
||||||
|
if (fullReasoningContent) {
|
||||||
|
estimatedUsage.reasoningTokens = Math.ceil(fullReasoningContent.length / 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
onUsageData(estimatedUsage);
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error estimating usage data:", error);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (signal?.aborted) {
|
if (signal?.aborted) {
|
||||||
|
|
|
@ -1,8 +1,59 @@
|
||||||
import { StreamingResponse } from "../types";
|
import { TokenUsage } from "../ResponseMetrics";
|
||||||
|
|
||||||
|
export interface StreamingResponse {
|
||||||
|
id: string;
|
||||||
|
created: number;
|
||||||
|
model: string;
|
||||||
|
object: string;
|
||||||
|
system_fingerprint?: string;
|
||||||
|
choices: StreamingChoices[];
|
||||||
|
provider_specific_fields?: any;
|
||||||
|
stream_options?: any;
|
||||||
|
citations?: any;
|
||||||
|
usage?: Usage;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface StreamingChoices {
|
||||||
|
finish_reason?: string | null;
|
||||||
|
index: number;
|
||||||
|
delta: Delta;
|
||||||
|
logprobs?: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Delta {
|
||||||
|
content?: string;
|
||||||
|
reasoning_content?: string;
|
||||||
|
role?: string;
|
||||||
|
function_call?: any;
|
||||||
|
tool_calls?: any;
|
||||||
|
audio?: any;
|
||||||
|
refusal?: any;
|
||||||
|
provider_specific_fields?: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Usage {
|
||||||
|
completion_tokens: number;
|
||||||
|
prompt_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
completion_tokens_details?: {
|
||||||
|
accepted_prediction_tokens?: number;
|
||||||
|
audio_tokens?: number;
|
||||||
|
reasoning_tokens?: number;
|
||||||
|
rejected_prediction_tokens?: number;
|
||||||
|
text_tokens?: number | null;
|
||||||
|
};
|
||||||
|
prompt_tokens_details?: {
|
||||||
|
audio_tokens?: number;
|
||||||
|
cached_tokens?: number;
|
||||||
|
text_tokens?: number;
|
||||||
|
image_tokens?: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export interface StreamProcessCallbacks {
|
export interface StreamProcessCallbacks {
|
||||||
onContent: (content: string, model?: string) => void;
|
onContent: (content: string, model?: string) => void;
|
||||||
onReasoningContent: (content: string) => void;
|
onReasoningContent: (content: string) => void;
|
||||||
|
onUsage?: (usage: TokenUsage) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const processStreamingResponse = (
|
export const processStreamingResponse = (
|
||||||
|
@ -25,4 +76,21 @@ export const processStreamingResponse = (
|
||||||
callbacks.onReasoningContent(choice.delta.reasoning_content);
|
callbacks.onReasoningContent(choice.delta.reasoning_content);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Process usage information if it exists and we have a handler
|
||||||
|
if (response.usage && callbacks.onUsage) {
|
||||||
|
console.log("Processing usage data:", response.usage);
|
||||||
|
const usageData: TokenUsage = {
|
||||||
|
completionTokens: response.usage.completion_tokens,
|
||||||
|
promptTokens: response.usage.prompt_tokens,
|
||||||
|
totalTokens: response.usage.total_tokens,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Extract reasoning tokens if available
|
||||||
|
if (response.usage.completion_tokens_details?.reasoning_tokens) {
|
||||||
|
usageData.reasoningTokens = response.usage.completion_tokens_details.reasoning_tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
callbacks.onUsage(usageData);
|
||||||
|
}
|
||||||
};
|
};
|
|
@ -9,6 +9,29 @@ export interface Delta {
|
||||||
provider_specific_fields?: any;
|
provider_specific_fields?: any;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface CompletionTokensDetails {
|
||||||
|
accepted_prediction_tokens?: number;
|
||||||
|
audio_tokens?: number;
|
||||||
|
reasoning_tokens?: number;
|
||||||
|
rejected_prediction_tokens?: number;
|
||||||
|
text_tokens?: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PromptTokensDetails {
|
||||||
|
audio_tokens?: number;
|
||||||
|
cached_tokens?: number;
|
||||||
|
text_tokens?: number;
|
||||||
|
image_tokens?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Usage {
|
||||||
|
completion_tokens: number;
|
||||||
|
prompt_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
completion_tokens_details?: CompletionTokensDetails;
|
||||||
|
prompt_tokens_details?: PromptTokensDetails;
|
||||||
|
}
|
||||||
|
|
||||||
export interface StreamingChoices {
|
export interface StreamingChoices {
|
||||||
finish_reason?: string | null;
|
finish_reason?: string | null;
|
||||||
index: number;
|
index: number;
|
||||||
|
@ -26,6 +49,7 @@ export interface StreamingResponse {
|
||||||
provider_specific_fields?: any;
|
provider_specific_fields?: any;
|
||||||
stream_options?: any;
|
stream_options?: any;
|
||||||
citations?: any;
|
citations?: any;
|
||||||
|
usage?: Usage;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface MessageType {
|
export interface MessageType {
|
||||||
|
@ -34,4 +58,11 @@ export interface MessageType {
|
||||||
model?: string;
|
model?: string;
|
||||||
isImage?: boolean;
|
isImage?: boolean;
|
||||||
reasoningContent?: string;
|
reasoningContent?: string;
|
||||||
|
timeToFirstToken?: number;
|
||||||
|
usage?: {
|
||||||
|
completionTokens?: number;
|
||||||
|
promptTokens?: number;
|
||||||
|
totalTokens?: number;
|
||||||
|
reasoningTokens?: number;
|
||||||
|
};
|
||||||
}
|
}
|
Loading…
Add table
Add a link
Reference in a new issue