UI enhancements

rh-pre-commit.version: 2.3.2 rh-pre-commit.check-secrets: ENABLED
2025-12-11 19:56:03 +00:00 · 2025-10-28 18:29:23 +08:00 · 2025-10-28 18:29:23 +08:00 · 9964287698
commit 9964287698
parent 5ef6ccf90e
7 changed files with 2688 additions and 1584 deletions
--- a/llama_stack/ui/app/chat-playground/page.tsx
+++ b/llama_stack/ui/app/chat-playground/page.tsx
@ -27,6 +27,12 @@ import {
  cleanMessageContent,
  extractCleanText,
 } from "@/lib/message-content-utils";
+import {
+  extractThinkTags,
+  extractStreamingThinking,
+  sanitizeThinkingContent,
+} from "@/lib/xml-parser";
+import type { ThinkingPart } from "@/components/chat-playground/thinking-block";
 export default function ChatPlaygroundPage() {
  const [currentSession, setCurrentSession] = useState<ChatSession | null>(
    null
@ -1057,6 +1063,9 @@ export default function ChatPlaygroundPage() {
      });

      let fullContent = "";
+      let thinkingBuffer = "";
+      const thinkingParts: ThinkingPart[] = [];
+      let currentThinkingStartTime: number | null = null;

      for await (const chunk of response) {
        const { text: deltaText } = processChunk(chunk);
@ -1087,7 +1096,36 @@ export default function ChatPlaygroundPage() {
        }

        if (deltaText) {
+          // Add to buffer for thinking extraction
+          thinkingBuffer += deltaText;
+
+          // Try to extract thinking content from buffer
+          const streamingResult = extractStreamingThinking(thinkingBuffer);
+
+          if (streamingResult.isComplete && streamingResult.thinking) {
+            // We have a complete thinking block
+            const endTime = Date.now();
+            const thinkingPart: ThinkingPart = {
+              type: "thinking",
+              content: sanitizeThinkingContent(streamingResult.thinking),
+              startTime: currentThinkingStartTime || endTime,
+              endTime: endTime,
+            };
+            thinkingParts.push(thinkingPart);
+            thinkingBuffer = streamingResult.remainingBuffer;
+            currentThinkingStartTime = null;
+          } else if (
+            !streamingResult.isComplete &&
+            streamingResult.thinking &&
+            !currentThinkingStartTime
+          ) {
+            // Start of a thinking block
+            currentThinkingStartTime = Date.now();
+          }
+
+          // Update full content with text that doesn't include thinking tags
          fullContent += deltaText;
+          const cleanedFullContent = extractThinkTags(fullContent).cleanText;

          flushSync(() => {
            setCurrentSession(prev => {
@ -1095,7 +1133,16 @@ export default function ChatPlaygroundPage() {
              const newMessages = [...prev.messages];
              const last = newMessages[newMessages.length - 1];
              if (last.role === "assistant") {
-                last.content = fullContent;
+                // Update content with cleaned text (without thinking tags)
+                last.content = cleanedFullContent;
+
+                // Add thinking parts to the message
+                if (thinkingParts.length > 0) {
+                  last.parts = [
+                    ...thinkingParts,
+                    { type: "text", text: cleanedFullContent },
+                  ];
+                }
              }
              const updatedSession = {
                ...prev,
--- a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx
+++ b/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx
@ -34,7 +34,14 @@ export default function ContentDetailPage() {
  const getTextFromContent = (content: unknown): string => {
    if (typeof content === "string") {
      return content;
-    } else if (content && content.type === "text") {
+    } else if (
+      content &&
+      typeof content === "object" &&
+      "type" in content &&
+      content.type === "text" &&
+      "text" in content &&
+      typeof content.text === "string"
+    ) {
      return content.text;
    }
    return "";
--- a/llama_stack/ui/components/chat-playground/chat-message.tsx
+++ b/llama_stack/ui/components/chat-playground/chat-message.tsx
@ -13,6 +13,10 @@ import {
 } from "@/components/ui/collapsible";
 import { FilePreview } from "@/components/ui/file-preview";
 import { MarkdownRenderer } from "@/components/chat-playground/markdown-renderer";
+import {
+  ThinkingBlock,
+  type ThinkingPart,
+} from "@/components/chat-playground/thinking-block";

 const chatBubbleVariants = cva(
  "group/message relative break-words rounded-lg p-3 text-sm sm:max-w-[70%]",
@ -117,6 +121,7 @@ interface StepStartPart {
 type MessagePart =
  | TextPart
  | ReasoningPart
+  | ThinkingPart
  | ToolInvocationPart
  | SourcePart
  | FilePart
@ -235,6 +240,8 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
        );
      } else if (part.type === "reasoning") {
        return <ReasoningBlock key={`reasoning-${index}`} part={part} />;
+      } else if (part.type === "thinking") {
+        return <ThinkingBlock key={`thinking-${index}`} part={part} />;
      } else if (part.type === "tool-invocation") {
        return (
          <ToolCall
--- a/llama_stack/ui/components/chat-playground/thinking-block.tsx
+++ b/llama_stack/ui/components/chat-playground/thinking-block.tsx
@ -0,0 +1,216 @@
+/**
+ * ThinkingBlock Component
+ * Displays LLM thinking/reasoning content in a collapsible, animated block
+ * Shows duration, pulsing animation during streaming, and expandable content
+ */
+
+"use client";
+
+import { useState } from "react";
+import { ChevronDown, Brain } from "lucide-react";
+import { motion, AnimatePresence } from "framer-motion";
+import {
+  Collapsible,
+  CollapsibleContent,
+  CollapsibleTrigger,
+} from "@/components/ui/collapsible";
+import { cn } from "@/lib/utils";
+
+export interface ThinkingPart {
+  type: "thinking";
+  content: string;
+  startTime?: number;
+  endTime?: number;
+}
+
+interface ThinkingBlockProps {
+  part: ThinkingPart;
+  isStreaming?: boolean;
+}
+
+/**
+ * Formats duration in milliseconds to human-readable format
+ */
+function formatDuration(ms: number): string {
+  if (ms < 1000) {
+    return `${ms}ms`;
+  }
+  const seconds = (ms / 1000).toFixed(1);
+  return `${seconds}s`;
+}
+
+/**
+ * Calculates duration from start and end times
+ */
+function calculateDuration(
+  startTime?: number,
+  endTime?: number
+): number | null {
+  if (!startTime) return null;
+  const end = endTime || Date.now();
+  return end - startTime;
+}
+
+export function ThinkingBlock({
+  part,
+  isStreaming = false,
+}: ThinkingBlockProps) {
+  const [isOpen, setIsOpen] = useState(false);
+
+  const duration = calculateDuration(part.startTime, part.endTime);
+  const isComplete = !!part.endTime;
+  const isPulsing = isStreaming || !isComplete;
+
+  return (
+    <Collapsible
+      open={isOpen}
+      onOpenChange={setIsOpen}
+      className="my-2 rounded-lg border border-purple-200 bg-purple-50 dark:border-purple-800 dark:bg-purple-950/30"
+    >
+      <CollapsibleTrigger className="flex w-full items-center justify-between px-4 py-2.5 hover:bg-purple-100 dark:hover:bg-purple-900/30 transition-colors rounded-lg">
+        <div className="flex items-center gap-2">
+          {/* Pulsing Brain Icon */}
+          <motion.div
+            animate={
+              isPulsing
+                ? {
+                    scale: [1, 1.1, 1],
+                    opacity: [0.7, 1, 0.7],
+                  }
+                : { scale: 1, opacity: 0.8 }
+            }
+            transition={
+              isPulsing
+                ? {
+                    duration: 1.5,
+                    repeat: Infinity,
+                    ease: "easeInOut",
+                  }
+                : {}
+            }
+          >
+            <Brain className="h-4 w-4 text-purple-600 dark:text-purple-400" />
+          </motion.div>
+
+          {/* Label */}
+          <span className="text-sm font-medium text-purple-700 dark:text-purple-300">
+            {isPulsing ? "Thinking..." : "Thought Process"}
+          </span>
+
+          {/* Duration Badge */}
+          {duration !== null && (
+            <motion.span
+              initial={{ opacity: 0, scale: 0.8 }}
+              animate={{ opacity: 1, scale: 1 }}
+              className="text-xs font-mono bg-purple-200 dark:bg-purple-800 text-purple-800 dark:text-purple-200 px-2 py-0.5 rounded-full"
+            >
+              {formatDuration(duration)}
+            </motion.span>
+          )}
+
+          {/* Streaming Indicator Dots */}
+          {isPulsing && (
+            <motion.div
+              className="flex gap-1"
+              initial={{ opacity: 0 }}
+              animate={{ opacity: 1 }}
+            >
+              {[0, 1, 2].map(i => (
+                <motion.div
+                  key={i}
+                  className="w-1 h-1 bg-purple-500 dark:bg-purple-400 rounded-full"
+                  animate={{
+                    y: [0, -4, 0],
+                  }}
+                  transition={{
+                    duration: 0.6,
+                    repeat: Infinity,
+                    delay: i * 0.15,
+                    ease: "easeInOut",
+                  }}
+                />
+              ))}
+            </motion.div>
+          )}
+        </div>
+
+        {/* Chevron Toggle Icon */}
+        <motion.div
+          animate={{ rotate: isOpen ? 180 : 0 }}
+          transition={{ duration: 0.2 }}
+        >
+          <ChevronDown className="h-4 w-4 text-purple-600 dark:text-purple-400" />
+        </motion.div>
+      </CollapsibleTrigger>
+
+      <AnimatePresence initial={false}>
+        {isOpen && (
+          <CollapsibleContent forceMount>
+            <motion.div
+              initial={{ height: 0, opacity: 0 }}
+              animate={{
+                height: "auto",
+                opacity: 1,
+                transition: {
+                  height: {
+                    duration: 0.3,
+                    ease: "easeOut",
+                  },
+                  opacity: {
+                    duration: 0.2,
+                    delay: 0.1,
+                  },
+                },
+              }}
+              exit={{
+                height: 0,
+                opacity: 0,
+                transition: {
+                  height: {
+                    duration: 0.3,
+                    ease: "easeIn",
+                  },
+                  opacity: {
+                    duration: 0.2,
+                  },
+                },
+              }}
+              className="overflow-hidden"
+            >
+              <div className="px-4 pb-3 pt-1">
+                {/* Content Area */}
+                <div
+                  className={cn(
+                    "rounded-md px-3 py-2.5 text-sm",
+                    "bg-white dark:bg-purple-950/50",
+                    "border border-purple-200 dark:border-purple-800",
+                    "font-mono text-purple-900 dark:text-purple-100",
+                    "whitespace-pre-wrap break-words"
+                  )}
+                >
+                  {part.content || (
+                    <span className="text-purple-400 dark:text-purple-600 italic">
+                      Thinking in progress...
+                    </span>
+                  )}
+                </div>
+
+                {/* Timestamp Information (if available) */}
+                {part.startTime && (
+                  <div className="mt-2 text-xs text-purple-600 dark:text-purple-400 font-mono">
+                    Started: {new Date(part.startTime).toLocaleTimeString()}
+                    {part.endTime && (
+                      <span className="ml-3">
+                        Ended: {new Date(part.endTime).toLocaleTimeString()}
+                      </span>
+                    )}
+                  </div>
+                )}
+              </div>
+            </motion.div>
+          </CollapsibleContent>
+        )}
+      </AnimatePresence>
+    </Collapsible>
+  );
+}
--- a/llama_stack/ui/lib/xml-parser.ts
+++ b/llama_stack/ui/lib/xml-parser.ts
@ -0,0 +1,149 @@
+/**
+ * Utility functions for parsing XML-style tags from LLM responses
+ * Specifically handles <think>...</think> tags in streaming content
+ */
+
+export interface ThinkingBlock {
+  content: string;
+  startIndex: number;
+  endIndex: number;
+}
+
+export interface ParsedThinkingContent {
+  thinkingBlocks: ThinkingBlock[];
+  cleanText: string;
+  hasIncompleteTag: boolean;
+}
+
+/**
+ * Extracts <think>...</think> blocks from text and returns cleaned text
+ * Handles streaming scenarios where tags might be incomplete
+ *
+ * @param text - Raw text possibly containing <think> tags
+ * @returns Object with thinking blocks, cleaned text, and incomplete tag status
+ */
+export function extractThinkTags(text: string): ParsedThinkingContent {
+  const thinkingBlocks: ThinkingBlock[] = [];
+  let cleanText = text;
+  let hasIncompleteTag = false;
+
+  // Regex to match complete <think>...</think> blocks
+  // Uses non-greedy matching to handle multiple blocks
+  const completeTagRegex = /<think>([\s\S]*?)<\/think>/g;
+
+  let match;
+  let lastIndex = 0;
+  const segments: string[] = [];
+
+  // Extract all complete thinking blocks
+  while ((match = completeTagRegex.exec(text)) !== null) {
+    thinkingBlocks.push({
+      content: match[1].trim(),
+      startIndex: match.index,
+      endIndex: match.index + match[0].length,
+    });
+
+    // Add text before this thinking block to segments
+    segments.push(text.substring(lastIndex, match.index));
+    lastIndex = match.index + match[0].length;
+  }
+
+  // Add remaining text after last thinking block
+  if (lastIndex < text.length) {
+    segments.push(text.substring(lastIndex));
+  }
+
+  cleanText = segments.join("");
+
+  // Check for incomplete opening tag (streaming scenario)
+  // Match partial <think> or <think>content without closing tag
+  const incompleteOpenTag = /<think(?:>[\s\S]*)?$/;
+  if (incompleteOpenTag.test(text)) {
+    hasIncompleteTag = true;
+  }
+
+  return {
+    thinkingBlocks,
+    cleanText,
+    hasIncompleteTag,
+  };
+}
+
+/**
+ * Checks if text ends with an incomplete <think> tag
+ * Useful for buffering during streaming
+ *
+ * @param text - Text to check
+ * @returns True if there's an incomplete opening tag
+ */
+export function isThinkTagOpen(text: string): boolean {
+  // Remove all complete tags first
+  const withoutCompleteTags = text.replace(/<think>[\s\S]*?<\/think>/g, "");
+
+  // Check if there's an opening tag without a closing tag
+  const openTagCount = (withoutCompleteTags.match(/<think>/g) || []).length;
+  const closeTagCount = (withoutCompleteTags.match(/<\/think>/g) || []).length;
+
+  return openTagCount > closeTagCount;
+}
+
+/**
+ * Extracts thinking content from a buffer of accumulated text
+ * Used during streaming to progressively extract thinking blocks
+ *
+ * @param buffer - Accumulated text buffer
+ * @returns Object with extracted thinking content and remaining buffer
+ */
+export function extractStreamingThinking(buffer: string): {
+  thinking: string;
+  remainingBuffer: string;
+  isComplete: boolean;
+} {
+  // Look for complete thinking blocks
+  const completeMatch = buffer.match(/<think>([\s\S]*?)<\/think>/);
+
+  if (completeMatch) {
+    const thinking = completeMatch[1].trim();
+    const remainingBuffer = buffer.substring(
+      completeMatch.index! + completeMatch[0].length
+    );
+
+    return {
+      thinking,
+      remainingBuffer,
+      isComplete: true,
+    };
+  }
+
+  // Check for incomplete thinking block being streamed
+  const incompleteMatch = buffer.match(/<think>([\s\S]*)$/);
+
+  if (incompleteMatch) {
+    return {
+      thinking: incompleteMatch[1], // Content so far
+      remainingBuffer: buffer, // Keep buffer intact
+      isComplete: false,
+    };
+  }
+
+  // No thinking content found
+  return {
+    thinking: "",
+    remainingBuffer: buffer,
+    isComplete: false,
+  };
+}
+
+/**
+ * Sanitizes thinking content for display
+ * Removes extra whitespace and normalizes line breaks
+ *
+ * @param content - Raw thinking content
+ * @returns Cleaned content
+ */
+export function sanitizeThinkingContent(content: string): string {
+  return content
+    .trim()
+    .replace(/\n{3,}/g, "\n\n") // Max 2 consecutive line breaks
+    .replace(/^\s+|\s+$/gm, ""); // Trim each line
+}
--- a/pyproject.toml
+++ b/pyproject.toml
@ -30,7 +30,7 @@ dependencies = [
    "httpx",
    "jinja2>=3.1.6",
    "jsonschema",
-    "llama-stack-client>=0.3.0",
+    "llama-stack-client>=0.2.23",
    "openai>=1.107",                                  # for expires_after support
    "prompt-toolkit",
    "python-dotenv",
@ -57,7 +57,6 @@ dependencies = [
 ui = [
    "streamlit",
    "pandas",
-    "llama-stack-client>=0.3.0",
    "streamlit-option-menu",
 ]

--- a/uv.lock
+++ b/uv.lock