Merge branch 'main' into content-extension

2025-10-06 12:37:33 +00:00 · 2025-08-28 12:58:13 -06:00 · 2025-08-28 12:58:13 -06:00 · 4c1f187c71
commit 4c1f187c71
parent 3e11e1472c 52106d95d3
42 changed files with 2089 additions and 389 deletions
--- a/llama_stack/core/build.py
+++ b/llama_stack/core/build.py
@ -80,7 +80,7 @@ def get_provider_dependencies(
    normal_deps = []
    special_deps = []
    for package in deps:
-        if "--no-deps" in package or "--index-url" in package:
+        if any(f in package for f in ["--no-deps", "--index-url", "--extra-index-url"]):
            special_deps.append(package)
        else:
            normal_deps.append(package)
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@ -225,7 +225,10 @@ def replace_env_vars(config: Any, path: str = "") -> Any:

        try:
            result = re.sub(pattern, get_env_var, config)
-            return _convert_string_to_proper_type(result)
+            # Only apply type conversion if substitution actually happened
+            if result != config:
+                return _convert_string_to_proper_type(result)
+            return result
        except EnvVarError as e:
            raise EnvVarError(e.var_name, e.path) from None

--- a/llama_stack/distributions/ci-tests/build.yaml
+++ b/llama_stack/distributions/ci-tests/build.yaml
@ -34,7 +34,7 @@ distribution_spec:
    telemetry:
    - provider_type: inline::meta-reference
    post_training:
-    - provider_type: inline::huggingface-cpu
+    - provider_type: inline::torchtune-cpu
    eval:
    - provider_type: inline::meta-reference
    datasetio:
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@ -156,13 +156,10 @@ providers:
      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db
      otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
  post_training:
-  - provider_id: huggingface-cpu
-    provider_type: inline::huggingface-cpu
+  - provider_id: torchtune-cpu
+    provider_type: inline::torchtune-cpu
    config:
-      checkpoint_format: huggingface
-      distributed_backend: null
-      device: cpu
-      dpo_output_dir: ~/.llama/distributions/ci-tests/dpo_output
+      checkpoint_format: meta
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
--- a/llama_stack/distributions/meta-reference-gpu/doc_template.md
+++ b/llama_stack/distributions/meta-reference-gpu/doc_template.md
@ -1,7 +1,7 @@
 ---
 orphan: true
 ---
-# Meta Reference Distribution
+# Meta Reference GPU Distribution

 ```{toctree}
 :maxdepth: 2
@ -29,7 +29,7 @@ The following environment variables can be configured:

 ## Prerequisite: Downloading Models

-Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.

 ```
 $ llama model list --downloaded
--- a/llama_stack/distributions/starter-gpu/build.yaml
+++ b/llama_stack/distributions/starter-gpu/build.yaml
@ -35,7 +35,7 @@ distribution_spec:
    telemetry:
    - provider_type: inline::meta-reference
    post_training:
-    - provider_type: inline::torchtune-gpu
+    - provider_type: inline::huggingface-gpu
    eval:
    - provider_type: inline::meta-reference
    datasetio:
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@ -156,10 +156,13 @@ providers:
      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/trace_store.db
      otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
  post_training:
-  - provider_id: torchtune-gpu
-    provider_type: inline::torchtune-gpu
+  - provider_id: huggingface-gpu
+    provider_type: inline::huggingface-gpu
    config:
-      checkpoint_format: meta
+      checkpoint_format: huggingface
+      distributed_backend: null
+      device: cpu
+      dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
--- a/llama_stack/distributions/starter-gpu/starter_gpu.py
+++ b/llama_stack/distributions/starter-gpu/starter_gpu.py
@ -17,6 +17,6 @@ def get_distribution_template() -> DistributionTemplate:
    template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."

    template.providers["post_training"] = [
-        BuildProvider(provider_type="inline::torchtune-gpu"),
+        BuildProvider(provider_type="inline::huggingface-gpu"),
    ]
    return template
--- a/llama_stack/distributions/starter/build.yaml
+++ b/llama_stack/distributions/starter/build.yaml
@ -35,7 +35,7 @@ distribution_spec:
    telemetry:
    - provider_type: inline::meta-reference
    post_training:
-    - provider_type: inline::huggingface-cpu
+    - provider_type: inline::torchtune-cpu
    eval:
    - provider_type: inline::meta-reference
    datasetio:
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@ -156,13 +156,10 @@ providers:
      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db
      otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
  post_training:
-  - provider_id: huggingface-cpu
-    provider_type: inline::huggingface-cpu
+  - provider_id: torchtune-cpu
+    provider_type: inline::torchtune-cpu
    config:
-      checkpoint_format: huggingface
-      distributed_backend: null
-      device: cpu
-      dpo_output_dir: ~/.llama/distributions/starter/dpo_output
+      checkpoint_format: meta
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
--- a/llama_stack/distributions/starter/starter.py
+++ b/llama_stack/distributions/starter/starter.py
@ -120,7 +120,7 @@ def get_distribution_template() -> DistributionTemplate:
        ],
        "agents": [BuildProvider(provider_type="inline::meta-reference")],
        "telemetry": [BuildProvider(provider_type="inline::meta-reference")],
-        "post_training": [BuildProvider(provider_type="inline::huggingface-cpu")],
+        "post_training": [BuildProvider(provider_type="inline::torchtune-cpu")],
        "eval": [BuildProvider(provider_type="inline::meta-reference")],
        "datasetio": [
            BuildProvider(provider_type="remote::huggingface"),
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@ -40,8 +40,9 @@ def available_providers() -> list[ProviderSpec]:
        InlineProviderSpec(
            api=Api.inference,
            provider_type="inline::sentence-transformers",
+            # CrossEncoder depends on torchao.quantization
            pip_packages=[
-                "torch torchvision --index-url https://download.pytorch.org/whl/cpu",
+                "torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
                "sentence-transformers --no-deps",
            ],
            module="llama_stack.providers.inline.inference.sentence_transformers",
--- a/llama_stack/providers/registry/post_training.py
+++ b/llama_stack/providers/registry/post_training.py
@ -13,7 +13,7 @@ from llama_stack.providers.datatypes import AdapterSpec, Api, InlineProviderSpec
 # The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.
 torchtune_def = dict(
    api=Api.post_training,
-    pip_packages=["torchtune==0.5.0", "torchao==0.8.0", "numpy"],
+    pip_packages=["numpy"],
    module="llama_stack.providers.inline.post_training.torchtune",
    config_class="llama_stack.providers.inline.post_training.torchtune.TorchtunePostTrainingConfig",
    api_dependencies=[
@ -23,56 +23,39 @@ torchtune_def = dict(
    description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.",
 )

-huggingface_def = dict(
-    api=Api.post_training,
-    pip_packages=["trl", "transformers", "peft", "datasets"],
-    module="llama_stack.providers.inline.post_training.huggingface",
-    config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
-    api_dependencies=[
-        Api.datasetio,
-        Api.datasets,
-    ],
-    description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
-)
-

 def available_providers() -> list[ProviderSpec]:
    return [
        InlineProviderSpec(
-            **{
+            **{  # type: ignore
                **torchtune_def,
                "provider_type": "inline::torchtune-cpu",
                "pip_packages": (
                    cast(list[str], torchtune_def["pip_packages"])
-                    + ["torch torchtune==0.5.0 torchao==0.8.0 --index-url https://download.pytorch.org/whl/cpu"]
+                    + ["torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu"]
                ),
            },
        ),
        InlineProviderSpec(
-            **{
-                **huggingface_def,
-                "provider_type": "inline::huggingface-cpu",
-                "pip_packages": (
-                    cast(list[str], huggingface_def["pip_packages"])
-                    + ["torch --index-url https://download.pytorch.org/whl/cpu"]
-                ),
-            },
-        ),
-        InlineProviderSpec(
-            **{
+            **{  # type: ignore
                **torchtune_def,
                "provider_type": "inline::torchtune-gpu",
                "pip_packages": (
-                    cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune==0.5.0 torchao==0.8.0"]
+                    cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune>=0.5.0 torchao>=0.12.0"]
                ),
            },
        ),
        InlineProviderSpec(
-            **{
-                **huggingface_def,
-                "provider_type": "inline::huggingface-gpu",
-                "pip_packages": (cast(list[str], huggingface_def["pip_packages"]) + ["torch"]),
-            },
+            api=Api.post_training,
+            provider_type="inline::huggingface-gpu",
+            pip_packages=["trl", "transformers", "peft", "datasets", "torch"],
+            module="llama_stack.providers.inline.post_training.huggingface",
+            config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
+            api_dependencies=[
+                Api.datasetio,
+                Api.datasets,
+            ],
+            description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
        ),
        remote_provider_spec(
            api=Api.post_training,
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@ -9,7 +9,6 @@ from __future__ import annotations  # for forward references
 import hashlib
 import json
 import os
-import sqlite3
 from collections.abc import Generator
 from contextlib import contextmanager
 from enum import StrEnum
@ -125,28 +124,13 @@ class ResponseStorage:
    def __init__(self, test_dir: Path):
        self.test_dir = test_dir
        self.responses_dir = self.test_dir / "responses"
-        self.db_path = self.test_dir / "index.sqlite"

        self._ensure_directories()
-        self._init_database()

    def _ensure_directories(self):
        self.test_dir.mkdir(parents=True, exist_ok=True)
        self.responses_dir.mkdir(exist_ok=True)

-    def _init_database(self):
-        with sqlite3.connect(self.db_path) as conn:
-            conn.execute("""
-                CREATE TABLE IF NOT EXISTS recordings (
-                    request_hash TEXT PRIMARY KEY,
-                    response_file TEXT,
-                    endpoint TEXT,
-                    model TEXT,
-                    timestamp TEXT,
-                    is_streaming BOOLEAN
-                )
-            """)
-
    def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
        """Store a request/response pair."""
        # Generate unique response filename
@ -169,34 +153,9 @@ class ResponseStorage:
            f.write("\n")
            f.flush()

-        # Update SQLite index
-        with sqlite3.connect(self.db_path) as conn:
-            conn.execute(
-                """
-                INSERT OR REPLACE INTO recordings
-                (request_hash, response_file, endpoint, model, timestamp, is_streaming)
-                VALUES (?, ?, ?, ?, datetime('now'), ?)
-            """,
-                (
-                    request_hash,
-                    response_file,
-                    request.get("endpoint", ""),
-                    request.get("model", ""),
-                    response.get("is_streaming", False),
-                ),
-            )
-
    def find_recording(self, request_hash: str) -> dict[str, Any] | None:
        """Find a recorded response by request hash."""
-        with sqlite3.connect(self.db_path) as conn:
-            result = conn.execute(
-                "SELECT response_file FROM recordings WHERE request_hash = ?", (request_hash,)
-            ).fetchone()
-
-        if not result:
-            return None
-
-        response_file = result[0]
+        response_file = f"{request_hash[:12]}.json"
        response_path = self.responses_dir / response_file

        if not response_path.exists():
--- a/llama_stack/ui/app/chat-playground/chunk-processor.test.tsx
+++ b/llama_stack/ui/app/chat-playground/chunk-processor.test.tsx
@ -0,0 +1,610 @@
+import { describe, test, expect } from "@jest/globals";
+
+// Extract the exact processChunk function implementation for testing
+function createProcessChunk() {
+  return (chunk: unknown): { text: string | null; isToolCall: boolean } => {
+    const chunkObj = chunk as Record<string, unknown>;
+
+    // Helper function to check if content contains function call JSON
+    const containsToolCall = (content: string): boolean => {
+      return (
+        content.includes('"type": "function"') ||
+        content.includes('"name": "knowledge_search"') ||
+        content.includes('"parameters":') ||
+        !!content.match(/\{"type":\s*"function".*?\}/)
+      );
+    };
+
+    // Check if this chunk contains a tool call (function call)
+    let isToolCall = false;
+
+    // Check direct chunk content if it's a string
+    if (typeof chunk === "string") {
+      isToolCall = containsToolCall(chunk);
+    }
+
+    // Check delta structures
+    if (
+      chunkObj?.delta &&
+      typeof chunkObj.delta === "object" &&
+      chunkObj.delta !== null
+    ) {
+      const delta = chunkObj.delta as Record<string, unknown>;
+      if ("tool_calls" in delta) {
+        isToolCall = true;
+      }
+      if (typeof delta.text === "string") {
+        if (containsToolCall(delta.text)) {
+          isToolCall = true;
+        }
+      }
+    }
+
+    // Check event structures
+    if (
+      chunkObj?.event &&
+      typeof chunkObj.event === "object" &&
+      chunkObj.event !== null
+    ) {
+      const event = chunkObj.event as Record<string, unknown>;
+
+      // Check event payload
+      if (
+        event?.payload &&
+        typeof event.payload === "object" &&
+        event.payload !== null
+      ) {
+        const payload = event.payload as Record<string, unknown>;
+        if (typeof payload.content === "string") {
+          if (containsToolCall(payload.content)) {
+            isToolCall = true;
+          }
+        }
+
+        // Check payload delta
+        if (
+          payload?.delta &&
+          typeof payload.delta === "object" &&
+          payload.delta !== null
+        ) {
+          const delta = payload.delta as Record<string, unknown>;
+          if (typeof delta.text === "string") {
+            if (containsToolCall(delta.text)) {
+              isToolCall = true;
+            }
+          }
+        }
+      }
+
+      // Check event delta
+      if (
+        event?.delta &&
+        typeof event.delta === "object" &&
+        event.delta !== null
+      ) {
+        const delta = event.delta as Record<string, unknown>;
+        if (typeof delta.text === "string") {
+          if (containsToolCall(delta.text)) {
+            isToolCall = true;
+          }
+        }
+        if (typeof delta.content === "string") {
+          if (containsToolCall(delta.content)) {
+            isToolCall = true;
+          }
+        }
+      }
+    }
+
+    // if it's a tool call, skip it (don't display in chat)
+    if (isToolCall) {
+      return { text: null, isToolCall: true };
+    }
+
+    // Extract text content from various chunk formats
+    let text: string | null = null;
+
+    // Helper function to extract clean text content, filtering out function calls
+    const extractCleanText = (content: string): string | null => {
+      if (containsToolCall(content)) {
+        try {
+          // Try to parse and extract non-function call parts
+          const jsonMatch = content.match(
+            /\{"type":\s*"function"[^}]*\}[^}]*\}/
+          );
+          if (jsonMatch) {
+            const jsonPart = jsonMatch[0];
+            const parsedJson = JSON.parse(jsonPart);
+
+            // If it's a function call, extract text after JSON
+            if (parsedJson.type === "function") {
+              const textAfterJson = content
+                .substring(content.indexOf(jsonPart) + jsonPart.length)
+                .trim();
+              return textAfterJson || null;
+            }
+          }
+          // If we can't parse it properly, skip the whole thing
+          return null;
+        } catch {
+          return null;
+        }
+      }
+      return content;
+    };
+
+    // Try direct delta text
+    if (
+      chunkObj?.delta &&
+      typeof chunkObj.delta === "object" &&
+      chunkObj.delta !== null
+    ) {
+      const delta = chunkObj.delta as Record<string, unknown>;
+      if (typeof delta.text === "string") {
+        text = extractCleanText(delta.text);
+      }
+    }
+
+    // Try event structures
+    if (
+      !text &&
+      chunkObj?.event &&
+      typeof chunkObj.event === "object" &&
+      chunkObj.event !== null
+    ) {
+      const event = chunkObj.event as Record<string, unknown>;
+
+      // Try event payload content
+      if (
+        event?.payload &&
+        typeof event.payload === "object" &&
+        event.payload !== null
+      ) {
+        const payload = event.payload as Record<string, unknown>;
+
+        // Try direct payload content
+        if (typeof payload.content === "string") {
+          text = extractCleanText(payload.content);
+        }
+
+        // Try turn_complete event structure: payload.turn.output_message.content
+        if (
+          !text &&
+          payload?.turn &&
+          typeof payload.turn === "object" &&
+          payload.turn !== null
+        ) {
+          const turn = payload.turn as Record<string, unknown>;
+          if (
+            turn?.output_message &&
+            typeof turn.output_message === "object" &&
+            turn.output_message !== null
+          ) {
+            const outputMessage = turn.output_message as Record<
+              string,
+              unknown
+            >;
+            if (typeof outputMessage.content === "string") {
+              text = extractCleanText(outputMessage.content);
+            }
+          }
+
+          // Fallback to model_response in steps if no output_message
+          if (
+            !text &&
+            turn?.steps &&
+            Array.isArray(turn.steps) &&
+            turn.steps.length > 0
+          ) {
+            for (const step of turn.steps) {
+              if (step && typeof step === "object" && step !== null) {
+                const stepObj = step as Record<string, unknown>;
+                if (
+                  stepObj?.model_response &&
+                  typeof stepObj.model_response === "object" &&
+                  stepObj.model_response !== null
+                ) {
+                  const modelResponse = stepObj.model_response as Record<
+                    string,
+                    unknown
+                  >;
+                  if (typeof modelResponse.content === "string") {
+                    text = extractCleanText(modelResponse.content);
+                    break;
+                  }
+                }
+              }
+            }
+          }
+        }
+
+        // Try payload delta
+        if (
+          !text &&
+          payload?.delta &&
+          typeof payload.delta === "object" &&
+          payload.delta !== null
+        ) {
+          const delta = payload.delta as Record<string, unknown>;
+          if (typeof delta.text === "string") {
+            text = extractCleanText(delta.text);
+          }
+        }
+      }
+
+      // Try event delta
+      if (
+        !text &&
+        event?.delta &&
+        typeof event.delta === "object" &&
+        event.delta !== null
+      ) {
+        const delta = event.delta as Record<string, unknown>;
+        if (typeof delta.text === "string") {
+          text = extractCleanText(delta.text);
+        }
+        if (!text && typeof delta.content === "string") {
+          text = extractCleanText(delta.content);
+        }
+      }
+    }
+
+    // Try choices structure (ChatML format)
+    if (
+      !text &&
+      chunkObj?.choices &&
+      Array.isArray(chunkObj.choices) &&
+      chunkObj.choices.length > 0
+    ) {
+      const choice = chunkObj.choices[0] as Record<string, unknown>;
+      if (
+        choice?.delta &&
+        typeof choice.delta === "object" &&
+        choice.delta !== null
+      ) {
+        const delta = choice.delta as Record<string, unknown>;
+        if (typeof delta.content === "string") {
+          text = extractCleanText(delta.content);
+        }
+      }
+    }
+
+    // Try direct string content
+    if (!text && typeof chunk === "string") {
+      text = extractCleanText(chunk);
+    }
+
+    return { text, isToolCall: false };
+  };
+}
+
+describe("Chunk Processor", () => {
+  const processChunk = createProcessChunk();
+
+  describe("Real Event Structures", () => {
+    test("handles turn_complete event with cancellation policy response", () => {
+      const chunk = {
+        event: {
+          payload: {
+            event_type: "turn_complete",
+            turn: {
+              turn_id: "50a2d6b7-49ed-4d1e-b1c2-6d68b3f726db",
+              session_id: "e7f62b8e-518c-4450-82df-e65fe49f27a3",
+              input_messages: [
+                {
+                  role: "user",
+                  content: "nice, what's the cancellation policy?",
+                  context: null,
+                },
+              ],
+              steps: [
+                {
+                  turn_id: "50a2d6b7-49ed-4d1e-b1c2-6d68b3f726db",
+                  step_id: "54074310-af42-414c-9ffe-fba5b2ead0ad",
+                  started_at: "2025-08-27T18:15:25.870703Z",
+                  completed_at: "2025-08-27T18:15:51.288993Z",
+                  step_type: "inference",
+                  model_response: {
+                    role: "assistant",
+                    content:
+                      "According to the search results, the cancellation policy for Red Hat Summit is as follows:\n\n* Cancellations must be received by 5 PM EDT on April 18, 2025 for a 50% refund of the registration fee.\n* No refunds will be given for cancellations received after 5 PM EDT on April 18, 2025.\n* Cancellation of travel reservations and hotel reservations are the responsibility of the registrant.",
+                    stop_reason: "end_of_turn",
+                    tool_calls: [],
+                  },
+                },
+              ],
+              output_message: {
+                role: "assistant",
+                content:
+                  "According to the search results, the cancellation policy for Red Hat Summit is as follows:\n\n* Cancellations must be received by 5 PM EDT on April 18, 2025 for a 50% refund of the registration fee.\n* No refunds will be given for cancellations received after 5 PM EDT on April 18, 2025.\n* Cancellation of travel reservations and hotel reservations are the responsibility of the registrant.",
+                stop_reason: "end_of_turn",
+                tool_calls: [],
+              },
+              output_attachments: [],
+              started_at: "2025-08-27T18:15:25.868548Z",
+              completed_at: "2025-08-27T18:15:51.289262Z",
+            },
+          },
+        },
+      };
+
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(false);
+      expect(result.text).toContain(
+        "According to the search results, the cancellation policy for Red Hat Summit is as follows:"
+      );
+      expect(result.text).toContain("5 PM EDT on April 18, 2025");
+    });
+
+    test("handles turn_complete event with address response", () => {
+      const chunk = {
+        event: {
+          payload: {
+            event_type: "turn_complete",
+            turn: {
+              turn_id: "2f4a1520-8ecc-4cb7-bb7b-886939e042b0",
+              session_id: "e7f62b8e-518c-4450-82df-e65fe49f27a3",
+              input_messages: [
+                {
+                  role: "user",
+                  content: "what's francisco's address",
+                  context: null,
+                },
+              ],
+              steps: [
+                {
+                  turn_id: "2f4a1520-8ecc-4cb7-bb7b-886939e042b0",
+                  step_id: "c13dd277-1acb-4419-8fbf-d5e2f45392ea",
+                  started_at: "2025-08-27T18:14:52.558761Z",
+                  completed_at: "2025-08-27T18:15:11.306032Z",
+                  step_type: "inference",
+                  model_response: {
+                    role: "assistant",
+                    content:
+                      "Francisco Arceo's address is:\n\nRed Hat\nUnited States\n17 Primrose Ln \nBasking Ridge New Jersey 07920",
+                    stop_reason: "end_of_turn",
+                    tool_calls: [],
+                  },
+                },
+              ],
+              output_message: {
+                role: "assistant",
+                content:
+                  "Francisco Arceo's address is:\n\nRed Hat\nUnited States\n17 Primrose Ln \nBasking Ridge New Jersey 07920",
+                stop_reason: "end_of_turn",
+                tool_calls: [],
+              },
+              output_attachments: [],
+              started_at: "2025-08-27T18:14:52.553707Z",
+              completed_at: "2025-08-27T18:15:11.306729Z",
+            },
+          },
+        },
+      };
+
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(false);
+      expect(result.text).toContain("Francisco Arceo's address is:");
+      expect(result.text).toContain("17 Primrose Ln");
+      expect(result.text).toContain("Basking Ridge New Jersey 07920");
+    });
+
+    test("handles turn_complete event with ticket cost response", () => {
+      const chunk = {
+        event: {
+          payload: {
+            event_type: "turn_complete",
+            turn: {
+              turn_id: "7ef244a3-efee-42ca-a9c8-942865251002",
+              session_id: "e7f62b8e-518c-4450-82df-e65fe49f27a3",
+              input_messages: [
+                {
+                  role: "user",
+                  content: "what was the ticket cost for summit?",
+                  context: null,
+                },
+              ],
+              steps: [
+                {
+                  turn_id: "7ef244a3-efee-42ca-a9c8-942865251002",
+                  step_id: "7651dda0-315a-472d-b1c1-3c2725f55bc5",
+                  started_at: "2025-08-27T18:14:21.710611Z",
+                  completed_at: "2025-08-27T18:14:39.706452Z",
+                  step_type: "inference",
+                  model_response: {
+                    role: "assistant",
+                    content:
+                      "The ticket cost for the Red Hat Summit was $999.00 for a conference pass.",
+                    stop_reason: "end_of_turn",
+                    tool_calls: [],
+                  },
+                },
+              ],
+              output_message: {
+                role: "assistant",
+                content:
+                  "The ticket cost for the Red Hat Summit was $999.00 for a conference pass.",
+                stop_reason: "end_of_turn",
+                tool_calls: [],
+              },
+              output_attachments: [],
+              started_at: "2025-08-27T18:14:21.705289Z",
+              completed_at: "2025-08-27T18:14:39.706752Z",
+            },
+          },
+        },
+      };
+
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(false);
+      expect(result.text).toBe(
+        "The ticket cost for the Red Hat Summit was $999.00 for a conference pass."
+      );
+    });
+  });
+
+  describe("Function Call Detection", () => {
+    test("detects function calls in direct string chunks", () => {
+      const chunk =
+        '{"type": "function", "name": "knowledge_search", "parameters": {"query": "test"}}';
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(true);
+      expect(result.text).toBe(null);
+    });
+
+    test("detects function calls in event payload content", () => {
+      const chunk = {
+        event: {
+          payload: {
+            content:
+              '{"type": "function", "name": "knowledge_search", "parameters": {"query": "test"}}',
+          },
+        },
+      };
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(true);
+      expect(result.text).toBe(null);
+    });
+
+    test("detects tool_calls in delta structure", () => {
+      const chunk = {
+        delta: {
+          tool_calls: [{ function: { name: "knowledge_search" } }],
+        },
+      };
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(true);
+      expect(result.text).toBe(null);
+    });
+
+    test("detects function call in mixed content but skips it", () => {
+      const chunk =
+        '{"type": "function", "name": "knowledge_search", "parameters": {"query": "test"}} Based on the search results, here is your answer.';
+      const result = processChunk(chunk);
+      // This is detected as a tool call and skipped entirely - the implementation prioritizes safety
+      expect(result.isToolCall).toBe(true);
+      expect(result.text).toBe(null);
+    });
+  });
+
+  describe("Text Extraction", () => {
+    test("extracts text from direct string chunks", () => {
+      const chunk = "Hello, this is a normal response.";
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(false);
+      expect(result.text).toBe("Hello, this is a normal response.");
+    });
+
+    test("extracts text from delta structure", () => {
+      const chunk = {
+        delta: {
+          text: "Hello, this is a normal response.",
+        },
+      };
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(false);
+      expect(result.text).toBe("Hello, this is a normal response.");
+    });
+
+    test("extracts text from choices structure", () => {
+      const chunk = {
+        choices: [
+          {
+            delta: {
+              content: "Hello, this is a normal response.",
+            },
+          },
+        ],
+      };
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(false);
+      expect(result.text).toBe("Hello, this is a normal response.");
+    });
+
+    test("prioritizes output_message over model_response in turn structure", () => {
+      const chunk = {
+        event: {
+          payload: {
+            turn: {
+              steps: [
+                {
+                  model_response: {
+                    content: "Model response content.",
+                  },
+                },
+              ],
+              output_message: {
+                content: "Final output message content.",
+              },
+            },
+          },
+        },
+      };
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(false);
+      expect(result.text).toBe("Final output message content.");
+    });
+
+    test("falls back to model_response when no output_message", () => {
+      const chunk = {
+        event: {
+          payload: {
+            turn: {
+              steps: [
+                {
+                  model_response: {
+                    content: "This is from the model response.",
+                  },
+                },
+              ],
+            },
+          },
+        },
+      };
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(false);
+      expect(result.text).toBe("This is from the model response.");
+    });
+  });
+
+  describe("Edge Cases", () => {
+    test("handles empty chunks", () => {
+      const result = processChunk("");
+      expect(result.isToolCall).toBe(false);
+      expect(result.text).toBe("");
+    });
+
+    test("handles null chunks", () => {
+      const result = processChunk(null);
+      expect(result.isToolCall).toBe(false);
+      expect(result.text).toBe(null);
+    });
+
+    test("handles undefined chunks", () => {
+      const result = processChunk(undefined);
+      expect(result.isToolCall).toBe(false);
+      expect(result.text).toBe(null);
+    });
+
+    test("handles chunks with no text content", () => {
+      const chunk = {
+        event: {
+          metadata: {
+            timestamp: "2024-01-01",
+          },
+        },
+      };
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(false);
+      expect(result.text).toBe(null);
+    });
+
+    test("handles malformed JSON in function calls gracefully", () => {
+      const chunk =
+        '{"type": "function", "name": "knowledge_search"} incomplete json';
+      const result = processChunk(chunk);
+      expect(result.isToolCall).toBe(true);
+      expect(result.text).toBe(null);
+    });
+  });
+});
--- a/llama_stack/ui/app/chat-playground/page.test.tsx
+++ b/llama_stack/ui/app/chat-playground/page.test.tsx
@ -31,6 +31,9 @@ const mockClient = {
  toolgroups: {
    list: jest.fn(),
  },
+  vectorDBs: {
+    list: jest.fn(),
+  },
 };

 jest.mock("@/hooks/use-auth-client", () => ({
@ -164,7 +167,7 @@ describe("ChatPlaygroundPage", () => {
      session_name: "Test Session",
      started_at: new Date().toISOString(),
      turns: [],
-    }); // No turns by default
+    });
    mockClient.agents.retrieve.mockResolvedValue({
      agent_id: "test-agent",
      agent_config: {
@ -417,7 +420,6 @@ describe("ChatPlaygroundPage", () => {
      });

      await waitFor(() => {
-        // first agent should be auto-selected
        expect(mockClient.agents.session.create).toHaveBeenCalledWith(
          "agent_123",
          { session_name: "Default Session" }
@ -464,7 +466,7 @@ describe("ChatPlaygroundPage", () => {
      });
    });

-    test("hides delete button when only one agent exists", async () => {
+    test("shows delete button even when only one agent exists", async () => {
      mockClient.agents.list.mockResolvedValue({
        data: [mockAgents[0]],
      });
@ -474,9 +476,7 @@ describe("ChatPlaygroundPage", () => {
      });

      await waitFor(() => {
-        expect(
-          screen.queryByTitle("Delete current agent")
-        ).not.toBeInTheDocument();
+        expect(screen.getByTitle("Delete current agent")).toBeInTheDocument();
      });
    });

@ -505,7 +505,7 @@ describe("ChatPlaygroundPage", () => {
      await waitFor(() => {
        expect(mockClient.agents.delete).toHaveBeenCalledWith("agent_123");
        expect(global.confirm).toHaveBeenCalledWith(
-          "Are you sure you want to delete this agent? This action cannot be undone and will delete all associated sessions."
+          "Are you sure you want to delete this agent? This action cannot be undone and will delete the agent and all its sessions."
        );
      });

@ -584,4 +584,207 @@ describe("ChatPlaygroundPage", () => {
      consoleSpy.mockRestore();
    });
  });
+
+  describe("RAG File Upload", () => {
+    let mockFileReader: {
+      readAsDataURL: jest.Mock;
+      readAsText: jest.Mock;
+      result: string | null;
+      onload: (() => void) | null;
+      onerror: (() => void) | null;
+    };
+    let mockRAGTool: {
+      insert: jest.Mock;
+    };
+
+    beforeEach(() => {
+      mockFileReader = {
+        readAsDataURL: jest.fn(),
+        readAsText: jest.fn(),
+        result: null,
+        onload: null,
+        onerror: null,
+      };
+      global.FileReader = jest.fn(() => mockFileReader);
+
+      mockRAGTool = {
+        insert: jest.fn().mockResolvedValue({}),
+      };
+      mockClient.toolRuntime = {
+        ragTool: mockRAGTool,
+      };
+    });
+
+    afterEach(() => {
+      jest.clearAllMocks();
+    });
+
+    test("handles text file upload", async () => {
+      new File(["Hello, world!"], "test.txt", {
+        type: "text/plain",
+      });
+
+      mockClient.agents.retrieve.mockResolvedValue({
+        agent_id: "test-agent",
+        agent_config: {
+          toolgroups: [
+            {
+              name: "builtin::rag/knowledge_search",
+              args: { vector_db_ids: ["test-vector-db"] },
+            },
+          ],
+        },
+      });
+
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        expect(screen.getByTestId("chat-component")).toBeInTheDocument();
+      });
+
+      const chatComponent = screen.getByTestId("chat-component");
+      chatComponent.getAttribute("data-onragfileupload");
+
+      // this is a simplified test
+      expect(mockRAGTool.insert).not.toHaveBeenCalled();
+    });
+
+    test("handles PDF file upload with FileReader", async () => {
+      new File([new ArrayBuffer(1000)], "test.pdf", {
+        type: "application/pdf",
+      });
+
+      const mockDataURL = "data:application/pdf;base64,JVBERi0xLjQK";
+      mockFileReader.result = mockDataURL;
+
+      mockClient.agents.retrieve.mockResolvedValue({
+        agent_id: "test-agent",
+        agent_config: {
+          toolgroups: [
+            {
+              name: "builtin::rag/knowledge_search",
+              args: { vector_db_ids: ["test-vector-db"] },
+            },
+          ],
+        },
+      });
+
+      await act(async () => {
+        render(<ChatPlaygroundPage />);
+      });
+
+      await waitFor(() => {
+        expect(screen.getByTestId("chat-component")).toBeInTheDocument();
+      });
+
+      expect(global.FileReader).toBeDefined();
+    });
+
+    test("handles different file types correctly", () => {
+      const getContentType = (filename: string): string => {
+        const ext = filename.toLowerCase().split(".").pop();
+        switch (ext) {
+          case "pdf":
+            return "application/pdf";
+          case "txt":
+            return "text/plain";
+          case "md":
+            return "text/markdown";
+          case "html":
+            return "text/html";
+          case "csv":
+            return "text/csv";
+          case "json":
+            return "application/json";
+          case "docx":
+            return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
+          case "doc":
+            return "application/msword";
+          default:
+            return "application/octet-stream";
+        }
+      };
+
+      expect(getContentType("test.pdf")).toBe("application/pdf");
+      expect(getContentType("test.txt")).toBe("text/plain");
+      expect(getContentType("test.md")).toBe("text/markdown");
+      expect(getContentType("test.html")).toBe("text/html");
+      expect(getContentType("test.csv")).toBe("text/csv");
+      expect(getContentType("test.json")).toBe("application/json");
+      expect(getContentType("test.docx")).toBe(
+        "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+      );
+      expect(getContentType("test.doc")).toBe("application/msword");
+      expect(getContentType("test.unknown")).toBe("application/octet-stream");
+    });
+
+    test("determines text vs binary file types correctly", () => {
+      const isTextFile = (mimeType: string): boolean => {
+        return (
+          mimeType.startsWith("text/") ||
+          mimeType === "application/json" ||
+          mimeType === "text/markdown" ||
+          mimeType === "text/html" ||
+          mimeType === "text/csv"
+        );
+      };
+
+      expect(isTextFile("text/plain")).toBe(true);
+      expect(isTextFile("text/markdown")).toBe(true);
+      expect(isTextFile("text/html")).toBe(true);
+      expect(isTextFile("text/csv")).toBe(true);
+      expect(isTextFile("application/json")).toBe(true);
+
+      expect(isTextFile("application/pdf")).toBe(false);
+      expect(isTextFile("application/msword")).toBe(false);
+      expect(
+        isTextFile(
+          "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+        )
+      ).toBe(false);
+      expect(isTextFile("application/octet-stream")).toBe(false);
+    });
+
+    test("handles FileReader error gracefully", async () => {
+      const pdfFile = new File([new ArrayBuffer(1000)], "test.pdf", {
+        type: "application/pdf",
+      });
+
+      mockFileReader.onerror = jest.fn();
+      const mockError = new Error("FileReader failed");
+
+      const fileReaderPromise = new Promise<string>((resolve, reject) => {
+        const reader = new FileReader();
+        reader.onload = () => resolve(reader.result as string);
+        reader.onerror = () => reject(reader.error || mockError);
+        reader.readAsDataURL(pdfFile);
+
+        setTimeout(() => {
+          reader.onerror?.(new ProgressEvent("error"));
+        }, 0);
+      });
+
+      await expect(fileReaderPromise).rejects.toBeDefined();
+    });
+
+    test("handles large file upload with FileReader approach", () => {
+      // create a large file
+      const largeFile = new File(
+        [new ArrayBuffer(10 * 1024 * 1024)],
+        "large.pdf",
+        {
+          type: "application/pdf",
+        }
+      );
+
+      expect(largeFile.size).toBe(10 * 1024 * 1024); // 10MB
+
+      expect(global.FileReader).toBeDefined();
+
+      const reader = new FileReader();
+      expect(reader.readAsDataURL).toBeDefined();
+    });
+  });
 });
--- a/llama_stack/ui/app/chat-playground/page.tsx
+++ b/llama_stack/ui/app/chat-playground/page.tsx
--- a/llama_stack/ui/components/chat-playground/chat.tsx
+++ b/llama_stack/ui/components/chat-playground/chat.tsx
@ -35,6 +35,7 @@ interface ChatPropsBase {
  ) => void;
  setMessages?: (messages: Message[]) => void;
  transcribeAudio?: (blob: Blob) => Promise<string>;
+  onRAGFileUpload?: (file: File) => Promise<void>;
 }

 interface ChatPropsWithoutSuggestions extends ChatPropsBase {
@ -62,6 +63,7 @@ export function Chat({
  onRateResponse,
  setMessages,
  transcribeAudio,
+  onRAGFileUpload,
 }: ChatProps) {
  const lastMessage = messages.at(-1);
  const isEmpty = messages.length === 0;
@ -226,16 +228,17 @@ export function Chat({
            isPending={isGenerating || isTyping}
            handleSubmit={handleSubmit}
          >
-            {({ files, setFiles }) => (
+            {() => (
              <MessageInput
                value={input}
                onChange={handleInputChange}
-                allowAttachments
-                files={files}
-                setFiles={setFiles}
+                allowAttachments={true}
+                files={null}
+                setFiles={() => {}}
                stop={handleStop}
                isGenerating={isGenerating}
                transcribeAudio={transcribeAudio}
+                onRAGFileUpload={onRAGFileUpload}
              />
            )}
          </ChatForm>
--- a/llama_stack/ui/components/chat-playground/conversations.tsx
+++ b/llama_stack/ui/components/chat-playground/conversations.tsx
@ -14,6 +14,7 @@ import { Card } from "@/components/ui/card";
 import { Trash2 } from "lucide-react";
 import type { Message } from "@/components/chat-playground/chat-message";
 import { useAuthClient } from "@/hooks/use-auth-client";
+import { cleanMessageContent } from "@/lib/message-content-utils";
 import type {
  Session,
  SessionCreateParams,
@ -219,10 +220,7 @@ export function Conversations({
            messages.push({
              id: `${turn.turn_id}-assistant-${messages.length}`,
              role: "assistant",
-              content:
-                typeof turn.output_message.content === "string"
-                  ? turn.output_message.content
-                  : JSON.stringify(turn.output_message.content),
+              content: cleanMessageContent(turn.output_message.content),
              createdAt: new Date(
                turn.completed_at || turn.started_at || Date.now()
              ),
@ -271,7 +269,7 @@ export function Conversations({
  );

  const deleteSession = async (sessionId: string) => {
-    if (sessions.length <= 1 || !selectedAgentId) {
+    if (!selectedAgentId) {
      return;
    }

@ -324,7 +322,6 @@ export function Conversations({
    }
  }, [currentSession]);

-  // Don't render if no agent is selected
  if (!selectedAgentId) {
    return null;
  }
@ -357,7 +354,7 @@ export function Conversations({
          + New
        </Button>

-        {currentSession && sessions.length > 1 && (
+        {currentSession && (
          <Button
            onClick={() => deleteSession(currentSession.id)}
            variant="outline"
--- a/llama_stack/ui/components/chat-playground/message-input.tsx
+++ b/llama_stack/ui/components/chat-playground/message-input.tsx
@ -21,6 +21,7 @@ interface MessageInputBaseProps
  isGenerating: boolean;
  enableInterrupt?: boolean;
  transcribeAudio?: (blob: Blob) => Promise<string>;
+  onRAGFileUpload?: (file: File) => Promise<void>;
 }

 interface MessageInputWithoutAttachmentProps extends MessageInputBaseProps {
@ -213,8 +214,13 @@ export function MessageInput({
              className
            )}
            {...(props.allowAttachments
-              ? omit(props, ["allowAttachments", "files", "setFiles"])
-              : omit(props, ["allowAttachments"]))}
+              ? omit(props, [
+                  "allowAttachments",
+                  "files",
+                  "setFiles",
+                  "onRAGFileUpload",
+                ])
+              : omit(props, ["allowAttachments", "onRAGFileUpload"]))}
          />

          {props.allowAttachments && (
@ -254,11 +260,19 @@ export function MessageInput({
            size="icon"
            variant="outline"
            className="h-8 w-8"
-            aria-label="Attach a file"
-            disabled={true}
+            aria-label="Upload file to RAG"
+            disabled={false}
            onClick={async () => {
-              const files = await showFileUploadDialog();
-              addFiles(files);
+              const input = document.createElement("input");
+              input.type = "file";
+              input.accept = ".pdf,.txt,.md,.html,.csv,.json";
+              input.onchange = async e => {
+                const file = (e.target as HTMLInputElement).files?.[0];
+                if (file && props.onRAGFileUpload) {
+                  await props.onRAGFileUpload(file);
+                }
+              };
+              input.click();
            }}
          >
            <Paperclip className="h-4 w-4" />
@ -337,28 +351,6 @@ function FileUploadOverlay({ isDragging }: FileUploadOverlayProps) {
  );
 }

-function showFileUploadDialog() {
-  const input = document.createElement("input");
-
-  input.type = "file";
-  input.multiple = true;
-  input.accept = "*/*";
-  input.click();
-
-  return new Promise<File[] | null>(resolve => {
-    input.onchange = e => {
-      const files = (e.currentTarget as HTMLInputElement).files;
-
-      if (files) {
-        resolve(Array.from(files));
-        return;
-      }
-
-      resolve(null);
-    };
-  });
-}
-
 function TranscribingOverlay() {
  return (
    <motion.div
--- a/llama_stack/ui/components/chat-playground/vector-db-creator.tsx
+++ b/llama_stack/ui/components/chat-playground/vector-db-creator.tsx
@ -0,0 +1,243 @@
+"use client";
+
+import { useState, useEffect } from "react";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { Card } from "@/components/ui/card";
+import {
+  Select,
+  SelectContent,
+  SelectItem,
+  SelectTrigger,
+  SelectValue,
+} from "@/components/ui/select";
+import { useAuthClient } from "@/hooks/use-auth-client";
+import type { Model } from "llama-stack-client/resources/models";
+
+interface VectorDBCreatorProps {
+  models: Model[];
+  onVectorDBCreated?: (vectorDbId: string) => void;
+  onCancel?: () => void;
+}
+
+interface VectorDBProvider {
+  api: string;
+  provider_id: string;
+  provider_type: string;
+}
+
+export function VectorDBCreator({
+  models,
+  onVectorDBCreated,
+  onCancel,
+}: VectorDBCreatorProps) {
+  const [vectorDbName, setVectorDbName] = useState("");
+  const [selectedEmbeddingModel, setSelectedEmbeddingModel] = useState("");
+  const [selectedProvider, setSelectedProvider] = useState("faiss");
+  const [availableProviders, setAvailableProviders] = useState<
+    VectorDBProvider[]
+  >([]);
+  const [isCreating, setIsCreating] = useState(false);
+  const [isLoadingProviders, setIsLoadingProviders] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+  const client = useAuthClient();
+
+  const embeddingModels = models.filter(
+    model => model.model_type === "embedding"
+  );
+
+  useEffect(() => {
+    const fetchProviders = async () => {
+      setIsLoadingProviders(true);
+      try {
+        const providersResponse = await client.providers.list();
+
+        const vectorIoProviders = providersResponse.filter(
+          (provider: VectorDBProvider) => provider.api === "vector_io"
+        );
+
+        setAvailableProviders(vectorIoProviders);
+
+        if (vectorIoProviders.length > 0) {
+          const faissProvider = vectorIoProviders.find(
+            (p: VectorDBProvider) => p.provider_id === "faiss"
+          );
+          setSelectedProvider(
+            faissProvider?.provider_id || vectorIoProviders[0].provider_id
+          );
+        }
+      } catch (err) {
+        console.error("Error fetching providers:", err);
+        setAvailableProviders([
+          {
+            api: "vector_io",
+            provider_id: "faiss",
+            provider_type: "inline::faiss",
+          },
+        ]);
+      } finally {
+        setIsLoadingProviders(false);
+      }
+    };
+
+    fetchProviders();
+  }, [client]);
+
+  const handleCreate = async () => {
+    if (!vectorDbName.trim() || !selectedEmbeddingModel) {
+      setError("Please provide a name and select an embedding model");
+      return;
+    }
+
+    setIsCreating(true);
+    setError(null);
+
+    try {
+      const embeddingModel = embeddingModels.find(
+        m => m.identifier === selectedEmbeddingModel
+      );
+
+      if (!embeddingModel) {
+        throw new Error("Selected embedding model not found");
+      }
+
+      const embeddingDimension = embeddingModel.metadata
+        ?.embedding_dimension as number;
+
+      if (!embeddingDimension) {
+        throw new Error("Embedding dimension not available for selected model");
+      }
+
+      const vectorDbId = vectorDbName.trim() || `vector_db_${Date.now()}`;
+
+      const response = await client.vectorDBs.register({
+        vector_db_id: vectorDbId,
+        embedding_model: selectedEmbeddingModel,
+        embedding_dimension: embeddingDimension,
+        provider_id: selectedProvider,
+      });
+
+      onVectorDBCreated?.(response.identifier || vectorDbId);
+    } catch (err) {
+      console.error("Error creating vector DB:", err);
+      setError(
+        err instanceof Error ? err.message : "Failed to create vector DB"
+      );
+    } finally {
+      setIsCreating(false);
+    }
+  };
+
+  return (
+    <Card className="p-6 space-y-4">
+      <h3 className="text-lg font-semibold">Create Vector Database</h3>
+
+      <div className="space-y-4">
+        <div>
+          <label className="text-sm font-medium block mb-2">
+            Vector DB Name
+          </label>
+          <Input
+            value={vectorDbName}
+            onChange={e => setVectorDbName(e.target.value)}
+            placeholder="My Vector Database"
+          />
+        </div>
+
+        <div>
+          <label className="text-sm font-medium block mb-2">
+            Embedding Model
+          </label>
+          <Select
+            value={selectedEmbeddingModel}
+            onValueChange={setSelectedEmbeddingModel}
+          >
+            <SelectTrigger>
+              <SelectValue placeholder="Select Embedding Model" />
+            </SelectTrigger>
+            <SelectContent>
+              {embeddingModels.map(model => (
+                <SelectItem key={model.identifier} value={model.identifier}>
+                  {model.identifier}
+                </SelectItem>
+              ))}
+            </SelectContent>
+          </Select>
+          {selectedEmbeddingModel && (
+            <p className="text-xs text-muted-foreground mt-1">
+              Dimension:{" "}
+              {embeddingModels.find(
+                m => m.identifier === selectedEmbeddingModel
+              )?.metadata?.embedding_dimension || "Unknown"}
+            </p>
+          )}
+        </div>
+
+        <div>
+          <label className="text-sm font-medium block mb-2">
+            Vector Database Provider
+          </label>
+          <Select
+            value={selectedProvider}
+            onValueChange={setSelectedProvider}
+            disabled={isLoadingProviders}
+          >
+            <SelectTrigger>
+              <SelectValue
+                placeholder={
+                  isLoadingProviders
+                    ? "Loading providers..."
+                    : "Select Provider"
+                }
+              />
+            </SelectTrigger>
+            <SelectContent>
+              {availableProviders.map(provider => (
+                <SelectItem
+                  key={provider.provider_id}
+                  value={provider.provider_id}
+                >
+                  {provider.provider_id}
+                </SelectItem>
+              ))}
+            </SelectContent>
+          </Select>
+          {selectedProvider && (
+            <p className="text-xs text-muted-foreground mt-1">
+              Selected provider: {selectedProvider}
+            </p>
+          )}
+        </div>
+
+        {error && (
+          <div className="text-destructive text-sm bg-destructive/10 p-2 rounded">
+            {error}
+          </div>
+        )}
+
+        <div className="flex gap-2 pt-2">
+          <Button
+            onClick={handleCreate}
+            disabled={
+              isCreating || !vectorDbName.trim() || !selectedEmbeddingModel
+            }
+            className="flex-1"
+          >
+            {isCreating ? "Creating..." : "Create Vector DB"}
+          </Button>
+          {onCancel && (
+            <Button variant="outline" onClick={onCancel} className="flex-1">
+              Cancel
+            </Button>
+          )}
+        </div>
+      </div>
+
+      <div className="text-xs text-muted-foreground bg-muted/50 p-3 rounded">
+        <strong>Note:</strong> This will create a new vector database that can
+        be used with RAG tools. After creation, you&apos;ll be able to upload
+        documents and use it for knowledge search in your agent conversations.
+      </div>
+    </Card>
+  );
+}
--- a/llama_stack/ui/lib/message-content-utils.ts
+++ b/llama_stack/ui/lib/message-content-utils.ts
@ -0,0 +1,51 @@
+// check if content contains function call JSON
+export const containsToolCall = (content: string): boolean => {
+  return (
+    content.includes('"type": "function"') ||
+    content.includes('"name": "knowledge_search"') ||
+    content.includes('"parameters":') ||
+    !!content.match(/\{"type":\s*"function".*?\}/)
+  );
+};
+
+export const extractCleanText = (content: string): string | null => {
+  if (containsToolCall(content)) {
+    try {
+      // parse and extract non-function call parts
+      const jsonMatch = content.match(/\{"type":\s*"function"[^}]*\}[^}]*\}/);
+      if (jsonMatch) {
+        const jsonPart = jsonMatch[0];
+        const parsedJson = JSON.parse(jsonPart);
+
+        // if function call, extract text after JSON
+        if (parsedJson.type === "function") {
+          const textAfterJson = content
+            .substring(content.indexOf(jsonPart) + jsonPart.length)
+            .trim();
+          return textAfterJson || null;
+        }
+      }
+      return null;
+    } catch {
+      return null;
+    }
+  }
+  return content;
+};
+
+// removes function call JSON handling different content types
+export const cleanMessageContent = (
+  content: string | unknown[] | unknown
+): string => {
+  if (typeof content === "string") {
+    const cleaned = extractCleanText(content);
+    return cleaned || "";
+  } else if (Array.isArray(content)) {
+    return content
+      .filter((item: { type: string }) => item.type === "text")
+      .map((item: { text: string }) => item.text)
+      .join("");
+  } else {
+    return JSON.stringify(content);
+  }
+};
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@ -18,7 +18,7 @@
        "class-variance-authority": "^0.7.1",
        "clsx": "^2.1.1",
        "framer-motion": "^11.18.2",
-        "llama-stack-client": "^0.2.18",
+        "llama-stack-client": "^0.2.19",
        "lucide-react": "^0.510.0",
        "next": "15.3.3",
        "next-auth": "^4.24.11",
@ -36,7 +36,7 @@
        "@eslint/eslintrc": "^3",
        "@tailwindcss/postcss": "^4",
        "@testing-library/dom": "^10.4.1",
-        "@testing-library/jest-dom": "^6.6.3",
+        "@testing-library/jest-dom": "^6.8.0",
        "@testing-library/react": "^16.3.0",
        "@types/jest": "^29.5.14",
        "@types/node": "^20",
@ -3597,18 +3597,17 @@
      }
    },
    "node_modules/@testing-library/jest-dom": {
-      "version": "6.6.3",
-      "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.6.3.tgz",
-      "integrity": "sha512-IteBhl4XqYNkM54f4ejhLRJiZNqcSCoXUOG2CPK7qbD322KjQozM4kHQOfkG2oln9b9HTYqs+Sae8vBATubxxA==",
+      "version": "6.8.0",
+      "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.8.0.tgz",
+      "integrity": "sha512-WgXcWzVM6idy5JaftTVC8Vs83NKRmGJz4Hqs4oyOuO2J4r/y79vvKZsb+CaGyCSEbUPI6OsewfPd0G1A0/TUZQ==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "@adobe/css-tools": "^4.4.0",
        "aria-query": "^5.0.0",
-        "chalk": "^3.0.0",
        "css.escape": "^1.5.1",
        "dom-accessibility-api": "^0.6.3",
-        "lodash": "^4.17.21",
+        "picocolors": "^1.1.1",
        "redent": "^3.0.0"
      },
      "engines": {
@ -3617,20 +3616,6 @@
        "yarn": ">=1"
      }
    },
-    "node_modules/@testing-library/jest-dom/node_modules/chalk": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/chalk/-/chalk-3.0.0.tgz",
-      "integrity": "sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "ansi-styles": "^4.1.0",
-        "supports-color": "^7.1.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
    "node_modules/@testing-library/jest-dom/node_modules/dom-accessibility-api": {
      "version": "0.6.3",
      "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.6.3.tgz",
@ -10021,9 +10006,9 @@
      "license": "MIT"
    },
    "node_modules/llama-stack-client": {
-      "version": "0.2.18",
-      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.18.tgz",
-      "integrity": "sha512-k+xQOz/TIU0cINP4Aih8q6xs7f/6qs0fLDMXTTKQr5C0F1jtCjRiwsas7bTsDfpKfYhg/7Xy/wPw/uZgi6aIVg==",
+      "version": "0.2.19",
+      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.19.tgz",
+      "integrity": "sha512-sDuAhUdEGlERZ3jlMUzPXcQTgMv/pGbDrPX0ifbE5S+gr7Q+7ohuQYrIXe+hXgIipFjq+y4b2c5laZ76tmAyEA==",
      "license": "MIT",
      "dependencies": {
        "@types/node": "^18.11.18",
@ -10066,13 +10051,6 @@
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
-    "node_modules/lodash": {
-      "version": "4.17.21",
-      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
-      "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
-      "dev": true,
-      "license": "MIT"
-    },
    "node_modules/lodash.merge": {
      "version": "4.6.2",
      "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",
--- a/llama_stack/ui/package.json
+++ b/llama_stack/ui/package.json
@ -23,7 +23,7 @@
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "framer-motion": "^11.18.2",
-    "llama-stack-client": "^0.2.18",
+    "llama-stack-client": "^0.2.19",
    "lucide-react": "^0.510.0",
    "next": "15.3.3",
    "next-auth": "^4.24.11",
@ -41,7 +41,7 @@
    "@eslint/eslintrc": "^3",
    "@tailwindcss/postcss": "^4",
    "@testing-library/dom": "^10.4.1",
-    "@testing-library/jest-dom": "^6.6.3",
+    "@testing-library/jest-dom": "^6.8.0",
    "@testing-library/react": "^16.3.0",
    "@types/jest": "^29.5.14",
    "@types/node": "^20",