mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
[UI] Render Reasoning content, ttft, usage metrics on test key page (#9931)
* add BaseReasoningEffortTests * BaseReasoningLLMTests * fix test rename * docs update thinking / reasoning content docs * show reasoning content on chat ui * chat ui allow pasting in content * chat ui fix size * chat ui, show num reasoning tokens used * ui render usage metrics on test key page
This commit is contained in:
parent
57bc03b30b
commit
7fde06d8d3
6 changed files with 521 additions and 20 deletions
|
@ -23,7 +23,7 @@ import {
|
|||
Divider,
|
||||
} from "@tremor/react";
|
||||
|
||||
import { message, Select, Spin, Typography, Tooltip } from "antd";
|
||||
import { message, Select, Spin, Typography, Tooltip, Input } from "antd";
|
||||
import { makeOpenAIChatCompletionRequest } from "./chat_ui/llm_calls/chat_completion";
|
||||
import { makeOpenAIImageGenerationRequest } from "./chat_ui/llm_calls/image_generation";
|
||||
import { fetchAvailableModels, ModelGroup } from "./chat_ui/llm_calls/fetch_models";
|
||||
|
@ -33,6 +33,9 @@ import { coy } from 'react-syntax-highlighter/dist/esm/styles/prism';
|
|||
import EndpointSelector from "./chat_ui/EndpointSelector";
|
||||
import TagSelector from "./tag_management/TagSelector";
|
||||
import { determineEndpointType } from "./chat_ui/EndpointUtils";
|
||||
import { MessageType } from "./chat_ui/types";
|
||||
import ReasoningContent from "./chat_ui/ReasoningContent";
|
||||
import ResponseMetrics, { TokenUsage } from "./chat_ui/ResponseMetrics";
|
||||
import {
|
||||
SendOutlined,
|
||||
ApiOutlined,
|
||||
|
@ -45,6 +48,8 @@ import {
|
|||
TagsOutlined
|
||||
} from "@ant-design/icons";
|
||||
|
||||
const { TextArea } = Input;
|
||||
|
||||
interface ChatUIProps {
|
||||
accessToken: string | null;
|
||||
token: string | null;
|
||||
|
@ -65,7 +70,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
|||
);
|
||||
const [apiKey, setApiKey] = useState("");
|
||||
const [inputMessage, setInputMessage] = useState("");
|
||||
const [chatHistory, setChatHistory] = useState<{ role: string; content: string; model?: string; isImage?: boolean }[]>([]);
|
||||
const [chatHistory, setChatHistory] = useState<MessageType[]>([]);
|
||||
const [selectedModel, setSelectedModel] = useState<string | undefined>(
|
||||
undefined
|
||||
);
|
||||
|
@ -138,7 +143,11 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
|||
if (lastMessage && lastMessage.role === role && !lastMessage.isImage) {
|
||||
return [
|
||||
...prevHistory.slice(0, prevHistory.length - 1),
|
||||
{ role, content: lastMessage.content + chunk, model },
|
||||
{
|
||||
...lastMessage,
|
||||
content: lastMessage.content + chunk,
|
||||
model
|
||||
},
|
||||
];
|
||||
} else {
|
||||
return [...prevHistory, { role, content: chunk, model }];
|
||||
|
@ -146,6 +155,97 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
|||
});
|
||||
};
|
||||
|
||||
const updateReasoningContent = (chunk: string) => {
|
||||
setChatHistory((prevHistory) => {
|
||||
const lastMessage = prevHistory[prevHistory.length - 1];
|
||||
|
||||
if (lastMessage && lastMessage.role === "assistant" && !lastMessage.isImage) {
|
||||
return [
|
||||
...prevHistory.slice(0, prevHistory.length - 1),
|
||||
{
|
||||
...lastMessage,
|
||||
reasoningContent: (lastMessage.reasoningContent || "") + chunk
|
||||
},
|
||||
];
|
||||
} else {
|
||||
// If there's no assistant message yet, we'll create one with empty content
|
||||
// but with reasoning content
|
||||
if (prevHistory.length > 0 && prevHistory[prevHistory.length - 1].role === "user") {
|
||||
return [
|
||||
...prevHistory,
|
||||
{
|
||||
role: "assistant",
|
||||
content: "",
|
||||
reasoningContent: chunk
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
return prevHistory;
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const updateTimingData = (timeToFirstToken: number) => {
|
||||
console.log("updateTimingData called with:", timeToFirstToken);
|
||||
setChatHistory((prevHistory) => {
|
||||
const lastMessage = prevHistory[prevHistory.length - 1];
|
||||
console.log("Current last message:", lastMessage);
|
||||
|
||||
if (lastMessage && lastMessage.role === "assistant") {
|
||||
console.log("Updating assistant message with timeToFirstToken:", timeToFirstToken);
|
||||
const updatedHistory = [
|
||||
...prevHistory.slice(0, prevHistory.length - 1),
|
||||
{
|
||||
...lastMessage,
|
||||
timeToFirstToken
|
||||
},
|
||||
];
|
||||
console.log("Updated chat history:", updatedHistory);
|
||||
return updatedHistory;
|
||||
}
|
||||
// If the last message is a user message and no assistant message exists yet,
|
||||
// create a new assistant message with empty content
|
||||
else if (lastMessage && lastMessage.role === "user") {
|
||||
console.log("Creating new assistant message with timeToFirstToken:", timeToFirstToken);
|
||||
return [
|
||||
...prevHistory,
|
||||
{
|
||||
role: "assistant",
|
||||
content: "",
|
||||
timeToFirstToken
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
console.log("No appropriate message found to update timing");
|
||||
return prevHistory;
|
||||
});
|
||||
};
|
||||
|
||||
const updateUsageData = (usage: TokenUsage) => {
|
||||
console.log("Received usage data:", usage);
|
||||
setChatHistory((prevHistory) => {
|
||||
const lastMessage = prevHistory[prevHistory.length - 1];
|
||||
|
||||
if (lastMessage && lastMessage.role === "assistant") {
|
||||
console.log("Updating message with usage data:", usage);
|
||||
const updatedMessage = {
|
||||
...lastMessage,
|
||||
usage
|
||||
};
|
||||
console.log("Updated message:", updatedMessage);
|
||||
|
||||
return [
|
||||
...prevHistory.slice(0, prevHistory.length - 1),
|
||||
updatedMessage
|
||||
];
|
||||
}
|
||||
|
||||
return prevHistory;
|
||||
});
|
||||
};
|
||||
|
||||
const updateImageUI = (imageUrl: string, model: string) => {
|
||||
setChatHistory((prevHistory) => [
|
||||
...prevHistory,
|
||||
|
@ -153,10 +253,12 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
|||
]);
|
||||
};
|
||||
|
||||
const handleKeyDown = (event: React.KeyboardEvent<HTMLInputElement>) => {
|
||||
if (event.key === 'Enter') {
|
||||
const handleKeyDown = (event: React.KeyboardEvent<HTMLTextAreaElement>) => {
|
||||
if (event.key === 'Enter' && !event.shiftKey) {
|
||||
event.preventDefault(); // Prevent default to avoid newline
|
||||
handleSendMessage();
|
||||
}
|
||||
// If Shift+Enter is pressed, the default behavior (inserting a newline) will occur
|
||||
};
|
||||
|
||||
const handleCancelRequest = () => {
|
||||
|
@ -206,7 +308,10 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
|||
selectedModel,
|
||||
effectiveApiKey,
|
||||
selectedTags,
|
||||
signal
|
||||
signal,
|
||||
updateReasoningContent,
|
||||
updateTimingData,
|
||||
updateUsageData
|
||||
);
|
||||
} else if (endpointType === EndpointType.IMAGE) {
|
||||
// For image generation
|
||||
|
@ -410,7 +515,16 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
|||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="whitespace-pre-wrap break-words max-w-full message-content">
|
||||
{message.reasoningContent && (
|
||||
<ReasoningContent reasoningContent={message.reasoningContent} />
|
||||
)}
|
||||
<div className="whitespace-pre-wrap break-words max-w-full message-content"
|
||||
style={{
|
||||
wordWrap: 'break-word',
|
||||
overflowWrap: 'break-word',
|
||||
wordBreak: 'break-word',
|
||||
hyphens: 'auto'
|
||||
}}>
|
||||
{message.isImage ? (
|
||||
<img
|
||||
src={message.content}
|
||||
|
@ -432,21 +546,33 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
|||
language={match[1]}
|
||||
PreTag="div"
|
||||
className="rounded-md my-2"
|
||||
wrapLines={true}
|
||||
wrapLongLines={true}
|
||||
{...props}
|
||||
>
|
||||
{String(children).replace(/\n$/, '')}
|
||||
</SyntaxHighlighter>
|
||||
) : (
|
||||
<code className={`${className} px-1.5 py-0.5 rounded bg-gray-100 text-sm font-mono`} {...props}>
|
||||
<code className={`${className} px-1.5 py-0.5 rounded bg-gray-100 text-sm font-mono`} style={{ wordBreak: 'break-word' }} {...props}>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
}
|
||||
},
|
||||
pre: ({ node, ...props }) => (
|
||||
<pre style={{ overflowX: 'auto', maxWidth: '100%' }} {...props} />
|
||||
)
|
||||
}}
|
||||
>
|
||||
{message.content}
|
||||
</ReactMarkdown>
|
||||
)}
|
||||
|
||||
{message.role === "assistant" && (message.timeToFirstToken || message.usage) && (
|
||||
<ResponseMetrics
|
||||
timeToFirstToken={message.timeToFirstToken}
|
||||
usage={message.usage}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -461,18 +587,19 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
|||
|
||||
<div className="p-4 border-t border-gray-200 bg-white">
|
||||
<div className="flex items-center">
|
||||
<TextInput
|
||||
type="text"
|
||||
<TextArea
|
||||
value={inputMessage}
|
||||
onChange={(e) => setInputMessage(e.target.value)}
|
||||
onKeyDown={handleKeyDown}
|
||||
placeholder={
|
||||
endpointType === EndpointType.CHAT
|
||||
? "Type your message..."
|
||||
? "Type your message... (Shift+Enter for new line)"
|
||||
: "Describe the image you want to generate..."
|
||||
}
|
||||
disabled={isLoading}
|
||||
className="flex-1"
|
||||
autoSize={{ minRows: 1, maxRows: 6 }}
|
||||
style={{ resize: 'none', paddingRight: '10px', paddingLeft: '10px' }}
|
||||
/>
|
||||
{isLoading ? (
|
||||
<Button
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
import React, { useState } from "react";
|
||||
import { Button, Collapse } from "antd";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
|
||||
import { coy } from 'react-syntax-highlighter/dist/esm/styles/prism';
|
||||
import { DownOutlined, RightOutlined, BulbOutlined } from "@ant-design/icons";
|
||||
|
||||
interface ReasoningContentProps {
|
||||
reasoningContent: string;
|
||||
}
|
||||
|
||||
const ReasoningContent: React.FC<ReasoningContentProps> = ({ reasoningContent }) => {
|
||||
const [isExpanded, setIsExpanded] = useState(true);
|
||||
|
||||
if (!reasoningContent) return null;
|
||||
|
||||
return (
|
||||
<div className="reasoning-content mt-1 mb-2">
|
||||
<Button
|
||||
type="text"
|
||||
className="flex items-center text-xs text-gray-500 hover:text-gray-700"
|
||||
onClick={() => setIsExpanded(!isExpanded)}
|
||||
icon={<BulbOutlined />}
|
||||
>
|
||||
{isExpanded ? "Hide reasoning" : "Show reasoning"}
|
||||
{isExpanded ? <DownOutlined className="ml-1" /> : <RightOutlined className="ml-1" />}
|
||||
</Button>
|
||||
|
||||
{isExpanded && (
|
||||
<div className="mt-2 p-3 bg-gray-50 border border-gray-200 rounded-md text-sm text-gray-700">
|
||||
<ReactMarkdown
|
||||
components={{
|
||||
code({node, inline, className, children, ...props}: React.ComponentPropsWithoutRef<'code'> & {
|
||||
inline?: boolean;
|
||||
node?: any;
|
||||
}) {
|
||||
const match = /language-(\w+)/.exec(className || '');
|
||||
return !inline && match ? (
|
||||
<SyntaxHighlighter
|
||||
style={coy as any}
|
||||
language={match[1]}
|
||||
PreTag="div"
|
||||
className="rounded-md my-2"
|
||||
{...props}
|
||||
>
|
||||
{String(children).replace(/\n$/, '')}
|
||||
</SyntaxHighlighter>
|
||||
) : (
|
||||
<code className={`${className} px-1.5 py-0.5 rounded bg-gray-100 text-sm font-mono`} {...props}>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
}
|
||||
}}
|
||||
>
|
||||
{reasoningContent}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default ReasoningContent;
|
|
@ -0,0 +1,80 @@
|
|||
import React from "react";
|
||||
import { Tooltip } from "antd";
|
||||
import {
|
||||
ClockCircleOutlined,
|
||||
NumberOutlined,
|
||||
ImportOutlined,
|
||||
ExportOutlined,
|
||||
ThunderboltOutlined,
|
||||
BulbOutlined
|
||||
} from "@ant-design/icons";
|
||||
|
||||
export interface TokenUsage {
|
||||
completionTokens?: number;
|
||||
promptTokens?: number;
|
||||
totalTokens?: number;
|
||||
reasoningTokens?: number;
|
||||
}
|
||||
|
||||
interface ResponseMetricsProps {
|
||||
timeToFirstToken?: number; // in milliseconds
|
||||
usage?: TokenUsage;
|
||||
}
|
||||
|
||||
const ResponseMetrics: React.FC<ResponseMetricsProps> = ({
|
||||
timeToFirstToken,
|
||||
usage
|
||||
}) => {
|
||||
if (!timeToFirstToken && !usage) return null;
|
||||
|
||||
return (
|
||||
<div className="response-metrics mt-2 pt-2 border-t border-gray-100 text-xs text-gray-500 flex flex-wrap gap-3">
|
||||
{timeToFirstToken !== undefined && (
|
||||
<Tooltip title="Time to first token">
|
||||
<div className="flex items-center">
|
||||
<ClockCircleOutlined className="mr-1" />
|
||||
<span>{(timeToFirstToken / 1000).toFixed(2)}s</span>
|
||||
</div>
|
||||
</Tooltip>
|
||||
)}
|
||||
|
||||
{usage?.promptTokens !== undefined && (
|
||||
<Tooltip title="Prompt tokens">
|
||||
<div className="flex items-center">
|
||||
<ImportOutlined className="mr-1" />
|
||||
<span>In: {usage.promptTokens}</span>
|
||||
</div>
|
||||
</Tooltip>
|
||||
)}
|
||||
|
||||
{usage?.completionTokens !== undefined && (
|
||||
<Tooltip title="Completion tokens">
|
||||
<div className="flex items-center">
|
||||
<ExportOutlined className="mr-1" />
|
||||
<span>Out: {usage.completionTokens}</span>
|
||||
</div>
|
||||
</Tooltip>
|
||||
)}
|
||||
|
||||
{usage?.reasoningTokens !== undefined && (
|
||||
<Tooltip title="Reasoning tokens">
|
||||
<div className="flex items-center">
|
||||
<BulbOutlined className="mr-1" />
|
||||
<span>Reasoning: {usage.reasoningTokens}</span>
|
||||
</div>
|
||||
</Tooltip>
|
||||
)}
|
||||
|
||||
{usage?.totalTokens !== undefined && (
|
||||
<Tooltip title="Total tokens">
|
||||
<div className="flex items-center">
|
||||
<NumberOutlined className="mr-1" />
|
||||
<span>Total: {usage.totalTokens}</span>
|
||||
</div>
|
||||
</Tooltip>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default ResponseMetrics;
|
|
@ -1,14 +1,18 @@
|
|||
import openai from "openai";
|
||||
import { ChatCompletionMessageParam } from "openai/resources/chat/completions";
|
||||
import { message } from "antd";
|
||||
import { TokenUsage } from "../ResponseMetrics";
|
||||
|
||||
export async function makeOpenAIChatCompletionRequest(
|
||||
chatHistory: { role: string; content: string }[],
|
||||
updateUI: (chunk: string, model: string) => void,
|
||||
updateUI: (chunk: string, model?: string) => void,
|
||||
selectedModel: string,
|
||||
accessToken: string,
|
||||
tags?: string[],
|
||||
signal?: AbortSignal
|
||||
signal?: AbortSignal,
|
||||
onReasoningContent?: (content: string) => void,
|
||||
onTimingData?: (timeToFirstToken: number) => void,
|
||||
onUsageData?: (usage: TokenUsage) => void
|
||||
) {
|
||||
// base url should be the current base_url
|
||||
const isLocal = process.env.NODE_ENV === "development";
|
||||
|
@ -20,23 +24,85 @@ export async function makeOpenAIChatCompletionRequest(
|
|||
? "http://localhost:4000"
|
||||
: window.location.origin;
|
||||
const client = new openai.OpenAI({
|
||||
apiKey: accessToken, // Replace with your OpenAI API key
|
||||
baseURL: proxyBaseUrl, // Replace with your OpenAI API base URL
|
||||
dangerouslyAllowBrowser: true, // using a temporary litellm proxy key
|
||||
apiKey: accessToken,
|
||||
baseURL: proxyBaseUrl,
|
||||
dangerouslyAllowBrowser: true,
|
||||
defaultHeaders: tags && tags.length > 0 ? { 'x-litellm-tags': tags.join(',') } : undefined,
|
||||
});
|
||||
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
let firstTokenReceived = false;
|
||||
let timeToFirstToken: number | undefined = undefined;
|
||||
|
||||
// For collecting complete response text
|
||||
let fullResponseContent = "";
|
||||
let fullReasoningContent = "";
|
||||
|
||||
const response = await client.chat.completions.create({
|
||||
model: selectedModel,
|
||||
stream: true,
|
||||
stream_options: {
|
||||
include_usage: true,
|
||||
},
|
||||
messages: chatHistory as ChatCompletionMessageParam[],
|
||||
}, { signal });
|
||||
|
||||
for await (const chunk of response) {
|
||||
console.log(chunk);
|
||||
if (chunk.choices[0].delta.content) {
|
||||
updateUI(chunk.choices[0].delta.content, chunk.model);
|
||||
console.log("Stream chunk:", chunk);
|
||||
|
||||
// Process content and measure time to first token
|
||||
const delta = chunk.choices[0]?.delta as any;
|
||||
|
||||
// Debug what's in the delta
|
||||
console.log("Delta content:", chunk.choices[0]?.delta?.content);
|
||||
console.log("Delta reasoning content:", delta?.reasoning_content);
|
||||
|
||||
// Measure time to first token for either content or reasoning_content
|
||||
if (!firstTokenReceived && (chunk.choices[0]?.delta?.content || (delta && delta.reasoning_content))) {
|
||||
firstTokenReceived = true;
|
||||
timeToFirstToken = Date.now() - startTime;
|
||||
console.log("First token received! Time:", timeToFirstToken, "ms");
|
||||
if (onTimingData) {
|
||||
console.log("Calling onTimingData with:", timeToFirstToken);
|
||||
onTimingData(timeToFirstToken);
|
||||
} else {
|
||||
console.log("onTimingData callback is not defined!");
|
||||
}
|
||||
}
|
||||
|
||||
// Process content
|
||||
if (chunk.choices[0]?.delta?.content) {
|
||||
const content = chunk.choices[0].delta.content;
|
||||
updateUI(content, chunk.model);
|
||||
fullResponseContent += content;
|
||||
}
|
||||
|
||||
// Process reasoning content if present - using type assertion
|
||||
if (delta && delta.reasoning_content) {
|
||||
const reasoningContent = delta.reasoning_content;
|
||||
if (onReasoningContent) {
|
||||
onReasoningContent(reasoningContent);
|
||||
}
|
||||
fullReasoningContent += reasoningContent;
|
||||
}
|
||||
|
||||
// Check for usage data using type assertion
|
||||
const chunkWithUsage = chunk as any;
|
||||
if (chunkWithUsage.usage && onUsageData) {
|
||||
console.log("Usage data found:", chunkWithUsage.usage);
|
||||
const usageData: TokenUsage = {
|
||||
completionTokens: chunkWithUsage.usage.completion_tokens,
|
||||
promptTokens: chunkWithUsage.usage.prompt_tokens,
|
||||
totalTokens: chunkWithUsage.usage.total_tokens,
|
||||
};
|
||||
|
||||
// Check for reasoning tokens
|
||||
if (chunkWithUsage.usage.completion_tokens_details?.reasoning_tokens) {
|
||||
usageData.reasoningTokens = chunkWithUsage.usage.completion_tokens_details.reasoning_tokens;
|
||||
}
|
||||
|
||||
onUsageData(usageData);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
import { TokenUsage } from "../ResponseMetrics";
|
||||
|
||||
export interface StreamingResponse {
|
||||
id: string;
|
||||
created: number;
|
||||
model: string;
|
||||
object: string;
|
||||
system_fingerprint?: string;
|
||||
choices: StreamingChoices[];
|
||||
provider_specific_fields?: any;
|
||||
stream_options?: any;
|
||||
citations?: any;
|
||||
usage?: Usage;
|
||||
}
|
||||
|
||||
export interface StreamingChoices {
|
||||
finish_reason?: string | null;
|
||||
index: number;
|
||||
delta: Delta;
|
||||
logprobs?: any;
|
||||
}
|
||||
|
||||
export interface Delta {
|
||||
content?: string;
|
||||
reasoning_content?: string;
|
||||
role?: string;
|
||||
function_call?: any;
|
||||
tool_calls?: any;
|
||||
audio?: any;
|
||||
refusal?: any;
|
||||
provider_specific_fields?: any;
|
||||
}
|
||||
|
||||
export interface Usage {
|
||||
completion_tokens: number;
|
||||
prompt_tokens: number;
|
||||
total_tokens: number;
|
||||
completion_tokens_details?: {
|
||||
accepted_prediction_tokens?: number;
|
||||
audio_tokens?: number;
|
||||
reasoning_tokens?: number;
|
||||
rejected_prediction_tokens?: number;
|
||||
text_tokens?: number | null;
|
||||
};
|
||||
prompt_tokens_details?: {
|
||||
audio_tokens?: number;
|
||||
cached_tokens?: number;
|
||||
text_tokens?: number;
|
||||
image_tokens?: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface StreamProcessCallbacks {
|
||||
onContent: (content: string, model?: string) => void;
|
||||
onReasoningContent: (content: string) => void;
|
||||
onUsage?: (usage: TokenUsage) => void;
|
||||
}
|
||||
|
||||
export const processStreamingResponse = (
|
||||
response: StreamingResponse,
|
||||
callbacks: StreamProcessCallbacks
|
||||
) => {
|
||||
// Extract model information if available
|
||||
const model = response.model;
|
||||
|
||||
// Process regular content
|
||||
if (response.choices && response.choices.length > 0) {
|
||||
const choice = response.choices[0];
|
||||
|
||||
if (choice.delta?.content) {
|
||||
callbacks.onContent(choice.delta.content, model);
|
||||
}
|
||||
|
||||
// Process reasoning content if it exists
|
||||
if (choice.delta?.reasoning_content) {
|
||||
callbacks.onReasoningContent(choice.delta.reasoning_content);
|
||||
}
|
||||
}
|
||||
|
||||
// Process usage information if it exists and we have a handler
|
||||
if (response.usage && callbacks.onUsage) {
|
||||
console.log("Processing usage data:", response.usage);
|
||||
const usageData: TokenUsage = {
|
||||
completionTokens: response.usage.completion_tokens,
|
||||
promptTokens: response.usage.prompt_tokens,
|
||||
totalTokens: response.usage.total_tokens,
|
||||
};
|
||||
|
||||
// Extract reasoning tokens if available
|
||||
if (response.usage.completion_tokens_details?.reasoning_tokens) {
|
||||
usageData.reasoningTokens = response.usage.completion_tokens_details.reasoning_tokens;
|
||||
}
|
||||
|
||||
callbacks.onUsage(usageData);
|
||||
}
|
||||
};
|
68
ui/litellm-dashboard/src/components/chat_ui/types.ts
Normal file
68
ui/litellm-dashboard/src/components/chat_ui/types.ts
Normal file
|
@ -0,0 +1,68 @@
|
|||
export interface Delta {
|
||||
content?: string;
|
||||
reasoning_content?: string;
|
||||
role?: string;
|
||||
function_call?: any;
|
||||
tool_calls?: any;
|
||||
audio?: any;
|
||||
refusal?: any;
|
||||
provider_specific_fields?: any;
|
||||
}
|
||||
|
||||
export interface CompletionTokensDetails {
|
||||
accepted_prediction_tokens?: number;
|
||||
audio_tokens?: number;
|
||||
reasoning_tokens?: number;
|
||||
rejected_prediction_tokens?: number;
|
||||
text_tokens?: number | null;
|
||||
}
|
||||
|
||||
export interface PromptTokensDetails {
|
||||
audio_tokens?: number;
|
||||
cached_tokens?: number;
|
||||
text_tokens?: number;
|
||||
image_tokens?: number;
|
||||
}
|
||||
|
||||
export interface Usage {
|
||||
completion_tokens: number;
|
||||
prompt_tokens: number;
|
||||
total_tokens: number;
|
||||
completion_tokens_details?: CompletionTokensDetails;
|
||||
prompt_tokens_details?: PromptTokensDetails;
|
||||
}
|
||||
|
||||
export interface StreamingChoices {
|
||||
finish_reason?: string | null;
|
||||
index: number;
|
||||
delta: Delta;
|
||||
logprobs?: any;
|
||||
}
|
||||
|
||||
export interface StreamingResponse {
|
||||
id: string;
|
||||
created: number;
|
||||
model: string;
|
||||
object: string;
|
||||
system_fingerprint?: string;
|
||||
choices: StreamingChoices[];
|
||||
provider_specific_fields?: any;
|
||||
stream_options?: any;
|
||||
citations?: any;
|
||||
usage?: Usage;
|
||||
}
|
||||
|
||||
export interface MessageType {
|
||||
role: string;
|
||||
content: string;
|
||||
model?: string;
|
||||
isImage?: boolean;
|
||||
reasoningContent?: string;
|
||||
timeToFirstToken?: number;
|
||||
usage?: {
|
||||
completionTokens?: number;
|
||||
promptTokens?: number;
|
||||
totalTokens?: number;
|
||||
reasoningTokens?: number;
|
||||
};
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue