mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-11 19:56:03 +00:00
UI enhancements
rh-pre-commit.version: 2.3.2 rh-pre-commit.check-secrets: ENABLED
This commit is contained in:
parent
5ef6ccf90e
commit
9964287698
7 changed files with 2688 additions and 1584 deletions
|
|
@ -27,6 +27,12 @@ import {
|
|||
cleanMessageContent,
|
||||
extractCleanText,
|
||||
} from "@/lib/message-content-utils";
|
||||
import {
|
||||
extractThinkTags,
|
||||
extractStreamingThinking,
|
||||
sanitizeThinkingContent,
|
||||
} from "@/lib/xml-parser";
|
||||
import type { ThinkingPart } from "@/components/chat-playground/thinking-block";
|
||||
export default function ChatPlaygroundPage() {
|
||||
const [currentSession, setCurrentSession] = useState<ChatSession | null>(
|
||||
null
|
||||
|
|
@ -1057,6 +1063,9 @@ export default function ChatPlaygroundPage() {
|
|||
});
|
||||
|
||||
let fullContent = "";
|
||||
let thinkingBuffer = "";
|
||||
const thinkingParts: ThinkingPart[] = [];
|
||||
let currentThinkingStartTime: number | null = null;
|
||||
|
||||
for await (const chunk of response) {
|
||||
const { text: deltaText } = processChunk(chunk);
|
||||
|
|
@ -1087,7 +1096,36 @@ export default function ChatPlaygroundPage() {
|
|||
}
|
||||
|
||||
if (deltaText) {
|
||||
// Add to buffer for thinking extraction
|
||||
thinkingBuffer += deltaText;
|
||||
|
||||
// Try to extract thinking content from buffer
|
||||
const streamingResult = extractStreamingThinking(thinkingBuffer);
|
||||
|
||||
if (streamingResult.isComplete && streamingResult.thinking) {
|
||||
// We have a complete thinking block
|
||||
const endTime = Date.now();
|
||||
const thinkingPart: ThinkingPart = {
|
||||
type: "thinking",
|
||||
content: sanitizeThinkingContent(streamingResult.thinking),
|
||||
startTime: currentThinkingStartTime || endTime,
|
||||
endTime: endTime,
|
||||
};
|
||||
thinkingParts.push(thinkingPart);
|
||||
thinkingBuffer = streamingResult.remainingBuffer;
|
||||
currentThinkingStartTime = null;
|
||||
} else if (
|
||||
!streamingResult.isComplete &&
|
||||
streamingResult.thinking &&
|
||||
!currentThinkingStartTime
|
||||
) {
|
||||
// Start of a thinking block
|
||||
currentThinkingStartTime = Date.now();
|
||||
}
|
||||
|
||||
// Update full content with text that doesn't include thinking tags
|
||||
fullContent += deltaText;
|
||||
const cleanedFullContent = extractThinkTags(fullContent).cleanText;
|
||||
|
||||
flushSync(() => {
|
||||
setCurrentSession(prev => {
|
||||
|
|
@ -1095,7 +1133,16 @@ export default function ChatPlaygroundPage() {
|
|||
const newMessages = [...prev.messages];
|
||||
const last = newMessages[newMessages.length - 1];
|
||||
if (last.role === "assistant") {
|
||||
last.content = fullContent;
|
||||
// Update content with cleaned text (without thinking tags)
|
||||
last.content = cleanedFullContent;
|
||||
|
||||
// Add thinking parts to the message
|
||||
if (thinkingParts.length > 0) {
|
||||
last.parts = [
|
||||
...thinkingParts,
|
||||
{ type: "text", text: cleanedFullContent },
|
||||
];
|
||||
}
|
||||
}
|
||||
const updatedSession = {
|
||||
...prev,
|
||||
|
|
|
|||
|
|
@ -34,7 +34,14 @@ export default function ContentDetailPage() {
|
|||
const getTextFromContent = (content: unknown): string => {
|
||||
if (typeof content === "string") {
|
||||
return content;
|
||||
} else if (content && content.type === "text") {
|
||||
} else if (
|
||||
content &&
|
||||
typeof content === "object" &&
|
||||
"type" in content &&
|
||||
content.type === "text" &&
|
||||
"text" in content &&
|
||||
typeof content.text === "string"
|
||||
) {
|
||||
return content.text;
|
||||
}
|
||||
return "";
|
||||
|
|
|
|||
|
|
@ -13,6 +13,10 @@ import {
|
|||
} from "@/components/ui/collapsible";
|
||||
import { FilePreview } from "@/components/ui/file-preview";
|
||||
import { MarkdownRenderer } from "@/components/chat-playground/markdown-renderer";
|
||||
import {
|
||||
ThinkingBlock,
|
||||
type ThinkingPart,
|
||||
} from "@/components/chat-playground/thinking-block";
|
||||
|
||||
const chatBubbleVariants = cva(
|
||||
"group/message relative break-words rounded-lg p-3 text-sm sm:max-w-[70%]",
|
||||
|
|
@ -117,6 +121,7 @@ interface StepStartPart {
|
|||
type MessagePart =
|
||||
| TextPart
|
||||
| ReasoningPart
|
||||
| ThinkingPart
|
||||
| ToolInvocationPart
|
||||
| SourcePart
|
||||
| FilePart
|
||||
|
|
@ -235,6 +240,8 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
|
|||
);
|
||||
} else if (part.type === "reasoning") {
|
||||
return <ReasoningBlock key={`reasoning-${index}`} part={part} />;
|
||||
} else if (part.type === "thinking") {
|
||||
return <ThinkingBlock key={`thinking-${index}`} part={part} />;
|
||||
} else if (part.type === "tool-invocation") {
|
||||
return (
|
||||
<ToolCall
|
||||
|
|
|
|||
216
llama_stack/ui/components/chat-playground/thinking-block.tsx
Normal file
216
llama_stack/ui/components/chat-playground/thinking-block.tsx
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
/**
|
||||
* ThinkingBlock Component
|
||||
* Displays LLM thinking/reasoning content in a collapsible, animated block
|
||||
* Shows duration, pulsing animation during streaming, and expandable content
|
||||
*/
|
||||
|
||||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import { ChevronDown, Brain } from "lucide-react";
|
||||
import { motion, AnimatePresence } from "framer-motion";
|
||||
import {
|
||||
Collapsible,
|
||||
CollapsibleContent,
|
||||
CollapsibleTrigger,
|
||||
} from "@/components/ui/collapsible";
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
export interface ThinkingPart {
|
||||
type: "thinking";
|
||||
content: string;
|
||||
startTime?: number;
|
||||
endTime?: number;
|
||||
}
|
||||
|
||||
interface ThinkingBlockProps {
|
||||
part: ThinkingPart;
|
||||
isStreaming?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats duration in milliseconds to human-readable format
|
||||
*/
|
||||
function formatDuration(ms: number): string {
|
||||
if (ms < 1000) {
|
||||
return `${ms}ms`;
|
||||
}
|
||||
const seconds = (ms / 1000).toFixed(1);
|
||||
return `${seconds}s`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates duration from start and end times
|
||||
*/
|
||||
function calculateDuration(
|
||||
startTime?: number,
|
||||
endTime?: number
|
||||
): number | null {
|
||||
if (!startTime) return null;
|
||||
const end = endTime || Date.now();
|
||||
return end - startTime;
|
||||
}
|
||||
|
||||
export function ThinkingBlock({
|
||||
part,
|
||||
isStreaming = false,
|
||||
}: ThinkingBlockProps) {
|
||||
const [isOpen, setIsOpen] = useState(false);
|
||||
|
||||
const duration = calculateDuration(part.startTime, part.endTime);
|
||||
const isComplete = !!part.endTime;
|
||||
const isPulsing = isStreaming || !isComplete;
|
||||
|
||||
return (
|
||||
<Collapsible
|
||||
open={isOpen}
|
||||
onOpenChange={setIsOpen}
|
||||
className="my-2 rounded-lg border border-purple-200 bg-purple-50 dark:border-purple-800 dark:bg-purple-950/30"
|
||||
>
|
||||
<CollapsibleTrigger className="flex w-full items-center justify-between px-4 py-2.5 hover:bg-purple-100 dark:hover:bg-purple-900/30 transition-colors rounded-lg">
|
||||
<div className="flex items-center gap-2">
|
||||
{/* Pulsing Brain Icon */}
|
||||
<motion.div
|
||||
animate={
|
||||
isPulsing
|
||||
? {
|
||||
scale: [1, 1.1, 1],
|
||||
opacity: [0.7, 1, 0.7],
|
||||
}
|
||||
: { scale: 1, opacity: 0.8 }
|
||||
}
|
||||
transition={
|
||||
isPulsing
|
||||
? {
|
||||
duration: 1.5,
|
||||
repeat: Infinity,
|
||||
ease: "easeInOut",
|
||||
}
|
||||
: {}
|
||||
}
|
||||
>
|
||||
<Brain className="h-4 w-4 text-purple-600 dark:text-purple-400" />
|
||||
</motion.div>
|
||||
|
||||
{/* Label */}
|
||||
<span className="text-sm font-medium text-purple-700 dark:text-purple-300">
|
||||
{isPulsing ? "Thinking..." : "Thought Process"}
|
||||
</span>
|
||||
|
||||
{/* Duration Badge */}
|
||||
{duration !== null && (
|
||||
<motion.span
|
||||
initial={{ opacity: 0, scale: 0.8 }}
|
||||
animate={{ opacity: 1, scale: 1 }}
|
||||
className="text-xs font-mono bg-purple-200 dark:bg-purple-800 text-purple-800 dark:text-purple-200 px-2 py-0.5 rounded-full"
|
||||
>
|
||||
{formatDuration(duration)}
|
||||
</motion.span>
|
||||
)}
|
||||
|
||||
{/* Streaming Indicator Dots */}
|
||||
{isPulsing && (
|
||||
<motion.div
|
||||
className="flex gap-1"
|
||||
initial={{ opacity: 0 }}
|
||||
animate={{ opacity: 1 }}
|
||||
>
|
||||
{[0, 1, 2].map(i => (
|
||||
<motion.div
|
||||
key={i}
|
||||
className="w-1 h-1 bg-purple-500 dark:bg-purple-400 rounded-full"
|
||||
animate={{
|
||||
y: [0, -4, 0],
|
||||
}}
|
||||
transition={{
|
||||
duration: 0.6,
|
||||
repeat: Infinity,
|
||||
delay: i * 0.15,
|
||||
ease: "easeInOut",
|
||||
}}
|
||||
/>
|
||||
))}
|
||||
</motion.div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Chevron Toggle Icon */}
|
||||
<motion.div
|
||||
animate={{ rotate: isOpen ? 180 : 0 }}
|
||||
transition={{ duration: 0.2 }}
|
||||
>
|
||||
<ChevronDown className="h-4 w-4 text-purple-600 dark:text-purple-400" />
|
||||
</motion.div>
|
||||
</CollapsibleTrigger>
|
||||
|
||||
<AnimatePresence initial={false}>
|
||||
{isOpen && (
|
||||
<CollapsibleContent forceMount>
|
||||
<motion.div
|
||||
initial={{ height: 0, opacity: 0 }}
|
||||
animate={{
|
||||
height: "auto",
|
||||
opacity: 1,
|
||||
transition: {
|
||||
height: {
|
||||
duration: 0.3,
|
||||
ease: "easeOut",
|
||||
},
|
||||
opacity: {
|
||||
duration: 0.2,
|
||||
delay: 0.1,
|
||||
},
|
||||
},
|
||||
}}
|
||||
exit={{
|
||||
height: 0,
|
||||
opacity: 0,
|
||||
transition: {
|
||||
height: {
|
||||
duration: 0.3,
|
||||
ease: "easeIn",
|
||||
},
|
||||
opacity: {
|
||||
duration: 0.2,
|
||||
},
|
||||
},
|
||||
}}
|
||||
className="overflow-hidden"
|
||||
>
|
||||
<div className="px-4 pb-3 pt-1">
|
||||
{/* Content Area */}
|
||||
<div
|
||||
className={cn(
|
||||
"rounded-md px-3 py-2.5 text-sm",
|
||||
"bg-white dark:bg-purple-950/50",
|
||||
"border border-purple-200 dark:border-purple-800",
|
||||
"font-mono text-purple-900 dark:text-purple-100",
|
||||
"whitespace-pre-wrap break-words"
|
||||
)}
|
||||
>
|
||||
{part.content || (
|
||||
<span className="text-purple-400 dark:text-purple-600 italic">
|
||||
Thinking in progress...
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Timestamp Information (if available) */}
|
||||
{part.startTime && (
|
||||
<div className="mt-2 text-xs text-purple-600 dark:text-purple-400 font-mono">
|
||||
Started: {new Date(part.startTime).toLocaleTimeString()}
|
||||
{part.endTime && (
|
||||
<span className="ml-3">
|
||||
Ended: {new Date(part.endTime).toLocaleTimeString()}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</motion.div>
|
||||
</CollapsibleContent>
|
||||
)}
|
||||
</AnimatePresence>
|
||||
</Collapsible>
|
||||
);
|
||||
}
|
||||
149
llama_stack/ui/lib/xml-parser.ts
Normal file
149
llama_stack/ui/lib/xml-parser.ts
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
/**
|
||||
* Utility functions for parsing XML-style tags from LLM responses
|
||||
* Specifically handles <think>...</think> tags in streaming content
|
||||
*/
|
||||
|
||||
export interface ThinkingBlock {
|
||||
content: string;
|
||||
startIndex: number;
|
||||
endIndex: number;
|
||||
}
|
||||
|
||||
export interface ParsedThinkingContent {
|
||||
thinkingBlocks: ThinkingBlock[];
|
||||
cleanText: string;
|
||||
hasIncompleteTag: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts <think>...</think> blocks from text and returns cleaned text
|
||||
* Handles streaming scenarios where tags might be incomplete
|
||||
*
|
||||
* @param text - Raw text possibly containing <think> tags
|
||||
* @returns Object with thinking blocks, cleaned text, and incomplete tag status
|
||||
*/
|
||||
export function extractThinkTags(text: string): ParsedThinkingContent {
|
||||
const thinkingBlocks: ThinkingBlock[] = [];
|
||||
let cleanText = text;
|
||||
let hasIncompleteTag = false;
|
||||
|
||||
// Regex to match complete <think>...</think> blocks
|
||||
// Uses non-greedy matching to handle multiple blocks
|
||||
const completeTagRegex = /<think>([\s\S]*?)<\/think>/g;
|
||||
|
||||
let match;
|
||||
let lastIndex = 0;
|
||||
const segments: string[] = [];
|
||||
|
||||
// Extract all complete thinking blocks
|
||||
while ((match = completeTagRegex.exec(text)) !== null) {
|
||||
thinkingBlocks.push({
|
||||
content: match[1].trim(),
|
||||
startIndex: match.index,
|
||||
endIndex: match.index + match[0].length,
|
||||
});
|
||||
|
||||
// Add text before this thinking block to segments
|
||||
segments.push(text.substring(lastIndex, match.index));
|
||||
lastIndex = match.index + match[0].length;
|
||||
}
|
||||
|
||||
// Add remaining text after last thinking block
|
||||
if (lastIndex < text.length) {
|
||||
segments.push(text.substring(lastIndex));
|
||||
}
|
||||
|
||||
cleanText = segments.join("");
|
||||
|
||||
// Check for incomplete opening tag (streaming scenario)
|
||||
// Match partial <think> or <think>content without closing tag
|
||||
const incompleteOpenTag = /<think(?:>[\s\S]*)?$/;
|
||||
if (incompleteOpenTag.test(text)) {
|
||||
hasIncompleteTag = true;
|
||||
}
|
||||
|
||||
return {
|
||||
thinkingBlocks,
|
||||
cleanText,
|
||||
hasIncompleteTag,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if text ends with an incomplete <think> tag
|
||||
* Useful for buffering during streaming
|
||||
*
|
||||
* @param text - Text to check
|
||||
* @returns True if there's an incomplete opening tag
|
||||
*/
|
||||
export function isThinkTagOpen(text: string): boolean {
|
||||
// Remove all complete tags first
|
||||
const withoutCompleteTags = text.replace(/<think>[\s\S]*?<\/think>/g, "");
|
||||
|
||||
// Check if there's an opening tag without a closing tag
|
||||
const openTagCount = (withoutCompleteTags.match(/<think>/g) || []).length;
|
||||
const closeTagCount = (withoutCompleteTags.match(/<\/think>/g) || []).length;
|
||||
|
||||
return openTagCount > closeTagCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts thinking content from a buffer of accumulated text
|
||||
* Used during streaming to progressively extract thinking blocks
|
||||
*
|
||||
* @param buffer - Accumulated text buffer
|
||||
* @returns Object with extracted thinking content and remaining buffer
|
||||
*/
|
||||
export function extractStreamingThinking(buffer: string): {
|
||||
thinking: string;
|
||||
remainingBuffer: string;
|
||||
isComplete: boolean;
|
||||
} {
|
||||
// Look for complete thinking blocks
|
||||
const completeMatch = buffer.match(/<think>([\s\S]*?)<\/think>/);
|
||||
|
||||
if (completeMatch) {
|
||||
const thinking = completeMatch[1].trim();
|
||||
const remainingBuffer = buffer.substring(
|
||||
completeMatch.index! + completeMatch[0].length
|
||||
);
|
||||
|
||||
return {
|
||||
thinking,
|
||||
remainingBuffer,
|
||||
isComplete: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Check for incomplete thinking block being streamed
|
||||
const incompleteMatch = buffer.match(/<think>([\s\S]*)$/);
|
||||
|
||||
if (incompleteMatch) {
|
||||
return {
|
||||
thinking: incompleteMatch[1], // Content so far
|
||||
remainingBuffer: buffer, // Keep buffer intact
|
||||
isComplete: false,
|
||||
};
|
||||
}
|
||||
|
||||
// No thinking content found
|
||||
return {
|
||||
thinking: "",
|
||||
remainingBuffer: buffer,
|
||||
isComplete: false,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitizes thinking content for display
|
||||
* Removes extra whitespace and normalizes line breaks
|
||||
*
|
||||
* @param content - Raw thinking content
|
||||
* @returns Cleaned content
|
||||
*/
|
||||
export function sanitizeThinkingContent(content: string): string {
|
||||
return content
|
||||
.trim()
|
||||
.replace(/\n{3,}/g, "\n\n") // Max 2 consecutive line breaks
|
||||
.replace(/^\s+|\s+$/gm, ""); // Trim each line
|
||||
}
|
||||
|
|
@ -30,7 +30,7 @@ dependencies = [
|
|||
"httpx",
|
||||
"jinja2>=3.1.6",
|
||||
"jsonschema",
|
||||
"llama-stack-client>=0.3.0",
|
||||
"llama-stack-client>=0.2.23",
|
||||
"openai>=1.107", # for expires_after support
|
||||
"prompt-toolkit",
|
||||
"python-dotenv",
|
||||
|
|
@ -57,7 +57,6 @@ dependencies = [
|
|||
ui = [
|
||||
"streamlit",
|
||||
"pandas",
|
||||
"llama-stack-client>=0.3.0",
|
||||
"streamlit-option-menu",
|
||||
]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue