From a2c4c12384dafd4cc11ced7e6ae53c2f66112fd2 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 6 Nov 2025 15:51:57 -0800
Subject: [PATCH] chore(ui): remove the Streamlit UI (#4097)

---
 docs/docs/building_applications/index.mdx     |   3 -
 .../docs/building_applications/playground.mdx | 298 ---------------
 pyproject.toml                                |   8 -
 src/llama_stack/core/ui/Containerfile         |  11 -
 src/llama_stack/core/ui/README.md             |  50 ---
 src/llama_stack/core/ui/__init__.py           |   5 -
 src/llama_stack/core/ui/app.py                |  55 ---
 src/llama_stack/core/ui/modules/__init__.py   |   5 -
 src/llama_stack/core/ui/modules/api.py        |  32 --
 src/llama_stack/core/ui/modules/utils.py      |  42 ---
 src/llama_stack/core/ui/page/__init__.py      |   5 -
 .../core/ui/page/distribution/__init__.py     |   5 -
 .../core/ui/page/distribution/datasets.py     |  18 -
 .../core/ui/page/distribution/eval_tasks.py   |  20 -
 .../core/ui/page/distribution/models.py       |  18 -
 .../core/ui/page/distribution/providers.py    |  27 --
 .../core/ui/page/distribution/resources.py    |  48 ---
 .../ui/page/distribution/scoring_functions.py |  18 -
 .../core/ui/page/distribution/shields.py      |  19 -
 .../core/ui/page/evaluations/__init__.py      |   5 -
 .../core/ui/page/evaluations/app_eval.py      | 143 -------
 .../core/ui/page/evaluations/native_eval.py   | 253 -------------
 .../core/ui/page/playground/__init__.py       |   5 -
 .../core/ui/page/playground/chat.py           | 134 -------
 .../core/ui/page/playground/tools.py          | 352 ------------------
 src/llama_stack/core/ui/requirements.txt      |   5 -
 uv.lock                                       |  13 -
 27 files changed, 1597 deletions(-)
 delete mode 100644 docs/docs/building_applications/playground.mdx
 delete mode 100644 src/llama_stack/core/ui/Containerfile
 delete mode 100644 src/llama_stack/core/ui/README.md
 delete mode 100644 src/llama_stack/core/ui/__init__.py
 delete mode 100644 src/llama_stack/core/ui/app.py
 delete mode 100644 src/llama_stack/core/ui/modules/__init__.py
 delete mode 100644 src/llama_stack/core/ui/modules/api.py
 delete mode 100644 src/llama_stack/core/ui/modules/utils.py
 delete mode 100644 src/llama_stack/core/ui/page/__init__.py
 delete mode 100644 src/llama_stack/core/ui/page/distribution/__init__.py
 delete mode 100644 src/llama_stack/core/ui/page/distribution/datasets.py
 delete mode 100644 src/llama_stack/core/ui/page/distribution/eval_tasks.py
 delete mode 100644 src/llama_stack/core/ui/page/distribution/models.py
 delete mode 100644 src/llama_stack/core/ui/page/distribution/providers.py
 delete mode 100644 src/llama_stack/core/ui/page/distribution/resources.py
 delete mode 100644 src/llama_stack/core/ui/page/distribution/scoring_functions.py
 delete mode 100644 src/llama_stack/core/ui/page/distribution/shields.py
 delete mode 100644 src/llama_stack/core/ui/page/evaluations/__init__.py
 delete mode 100644 src/llama_stack/core/ui/page/evaluations/app_eval.py
 delete mode 100644 src/llama_stack/core/ui/page/evaluations/native_eval.py
 delete mode 100644 src/llama_stack/core/ui/page/playground/__init__.py
 delete mode 100644 src/llama_stack/core/ui/page/playground/chat.py
 delete mode 100644 src/llama_stack/core/ui/page/playground/tools.py
 delete mode 100644 src/llama_stack/core/ui/requirements.txt

diff --git a/docs/docs/building_applications/index.mdx b/docs/docs/building_applications/index.mdx
index a4b71efd7..935a02f8a 100644
--- a/docs/docs/building_applications/index.mdx
+++ b/docs/docs/building_applications/index.mdx
@@ -35,9 +35,6 @@ Here are the key topics that will help you build effective AI applications:
 - **[Telemetry](./telemetry.mdx)** - Monitor and analyze your agents' performance and behavior
 - **[Safety](./safety.mdx)** - Implement guardrails and safety measures to ensure responsible AI behavior
 
-### 🎮 **Interactive Development**
-- **[Playground](./playground.mdx)** - Interactive environment for testing and developing applications
-
 ## Application Patterns
 
 ### 🤖 **Conversational Agents**
diff --git a/docs/docs/building_applications/playground.mdx b/docs/docs/building_applications/playground.mdx
deleted file mode 100644
index f3290a356..000000000
--- a/docs/docs/building_applications/playground.mdx
+++ /dev/null
@@ -1,298 +0,0 @@
----
-title: Llama Stack Playground
-description: Interactive interface to explore and experiment with Llama Stack capabilities
-sidebar_label: Playground
-sidebar_position: 10
----
-
-import Tabs from '@theme/Tabs';
-import TabItem from '@theme/TabItem';
-
-# Llama Stack Playground
-
-:::note[Experimental Feature]
-The Llama Stack Playground is currently experimental and subject to change. We welcome feedback and contributions to help improve it.
-:::
-
-The Llama Stack Playground is a simple interface that aims to:
-- **Showcase capabilities and concepts** of Llama Stack in an interactive environment
-- **Demo end-to-end application code** to help users get started building their own applications
-- **Provide a UI** to help users inspect and understand Llama Stack API providers and resources
-
-## Key Features
-
-### Interactive Playground Pages
-
-The playground provides interactive pages for users to explore Llama Stack API capabilities:
-
-#### Chatbot Interface
-
-<video
-  controls
-  autoPlay
-  playsInline
-  muted
-  loop
-  style={{width: '100%'}}
->
-  <source src="https://github.com/user-attachments/assets/8d2ef802-5812-4a28-96e1-316038c84cbf" type="video/mp4" />
-  Your browser does not support the video tag.
-</video>
-
-<Tabs>
-<TabItem value="chat" label="Chat">
-
-**Simple Chat Interface**
-- Chat directly with Llama models through an intuitive interface
-- Uses the `/chat/completions` streaming API under the hood
-- Real-time message streaming for responsive interactions
-- Perfect for testing model capabilities and prompt engineering
-
-</TabItem>
-<TabItem value="rag" label="RAG Chat">
-
-**Document-Aware Conversations**
-- Upload documents to create memory banks
-- Chat with a RAG-enabled agent that can query your documents
-- Uses Llama Stack's `/agents` API to create and manage RAG sessions
-- Ideal for exploring knowledge-enhanced AI applications
-
-</TabItem>
-</Tabs>
-
-#### Evaluation Interface
-
-<video
-  controls
-  autoPlay
-  playsInline
-  muted
-  loop
-  style={{width: '100%'}}
->
-  <source src="https://github.com/user-attachments/assets/6cc1659f-eba4-49ca-a0a5-7c243557b4f5" type="video/mp4" />
-  Your browser does not support the video tag.
-</video>
-
-<Tabs>
-<TabItem value="scoring" label="Scoring Evaluations">
-
-**Custom Dataset Evaluation**
-- Upload your own evaluation datasets
-- Run evaluations using available scoring functions
-- Uses Llama Stack's `/scoring` API for flexible evaluation workflows
-- Great for testing application performance on custom metrics
-
-</TabItem>
-<TabItem value="benchmarks" label="Benchmark Evaluations">
-
-<video
-  controls
-  autoPlay
-  playsInline
-  muted
-  loop
-  style={{width: '100%', marginBottom: '1rem'}}
->
-  <source src="https://github.com/user-attachments/assets/345845c7-2a2b-4095-960a-9ae40f6a93cf" type="video/mp4" />
-  Your browser does not support the video tag.
-</video>
-
-**Pre-registered Evaluation Tasks**
-- Evaluate models or agents on pre-defined tasks
-- Uses Llama Stack's `/eval` API for comprehensive evaluation
-- Combines datasets and scoring functions for standardized testing
-
-**Setup Requirements:**
-Register evaluation datasets and benchmarks first:
-
-```bash
-# Register evaluation dataset
-llama-stack-client datasets register \
-  --dataset-id "mmlu" \
-  --provider-id "huggingface" \
-  --url "https://huggingface.co/datasets/llamastack/evals" \
-  --metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
-  --schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string"}, "chat_completion_input": {"type": "string"}}'
-
-# Register benchmark task
-llama-stack-client benchmarks register \
-  --eval-task-id meta-reference-mmlu \
-  --provider-id meta-reference \
-  --dataset-id mmlu \
-  --scoring-functions basic::regex_parser_multiple_choice_answer
-```
-
-</TabItem>
-</Tabs>
-
-#### Inspection Interface
-
-<video
-  controls
-  autoPlay
-  playsInline
-  muted
-  loop
-  style={{width: '100%'}}
->
-  <source src="https://github.com/user-attachments/assets/01d52b2d-92af-4e3a-b623-a9b8ba22ba99" type="video/mp4" />
-  Your browser does not support the video tag.
-</video>
-
-<Tabs>
-<TabItem value="providers" label="API Providers">
-
-**Provider Management**
-- Inspect available Llama Stack API providers
-- View provider configurations and capabilities
-- Uses the `/providers` API for real-time provider information
-- Essential for understanding your deployment's capabilities
-
-</TabItem>
-<TabItem value="resources" label="API Resources">
-
-**Resource Exploration**
-- Inspect Llama Stack API resources including:
-  - **Models**: Available language models
-  - **Datasets**: Registered evaluation datasets
-  - **Memory Banks**: Vector databases and knowledge stores
-  - **Benchmarks**: Evaluation tasks and scoring functions
-  - **Shields**: Safety and content moderation tools
-- Uses `/<resources>/list` APIs for comprehensive resource visibility
-- For detailed information about resources, see [Core Concepts](/docs/concepts)
-
-</TabItem>
-</Tabs>
-
-## Getting Started
-
-### Quick Start Guide
-
-<Tabs>
-<TabItem value="setup" label="Setup">
-
-**1. Start the Llama Stack API Server**
-
-```bash
-llama stack list-deps together | xargs -L1 uv pip install
-llama stack run together
-```
-
-**2. Start the Streamlit UI**
-
-```bash
-# Launch the playground interface
-uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
-```
-
-</TabItem>
-<TabItem value="usage" label="Usage Tips">
-
-**Making the Most of the Playground:**
-
-- **Start with Chat**: Test basic model interactions and prompt engineering
-- **Explore RAG**: Upload sample documents to see knowledge-enhanced responses
-- **Try Evaluations**: Use the scoring interface to understand evaluation metrics
-- **Inspect Resources**: Check what providers and resources are available
-- **Experiment with Settings**: Adjust parameters to see how they affect results
-
-</TabItem>
-</Tabs>
-
-### Available Distributions
-
-The playground works with any Llama Stack distribution. Popular options include:
-
-<Tabs>
-<TabItem value="together" label="Together AI">
-
-```bash
-llama stack list-deps together | xargs -L1 uv pip install
-llama stack run together
-```
-
-**Features:**
-- Cloud-hosted models
-- Fast inference
-- Multiple model options
-
-</TabItem>
-<TabItem value="ollama" label="Ollama (Local)">
-
-```bash
-llama stack list-deps ollama | xargs -L1 uv pip install
-llama stack run ollama
-```
-
-**Features:**
-- Local model execution
-- Privacy-focused
-- No internet required
-
-</TabItem>
-<TabItem value="meta-reference" label="Meta Reference">
-
-```bash
-llama stack list-deps meta-reference | xargs -L1 uv pip install
-llama stack run meta-reference
-```
-
-**Features:**
-- Reference implementation
-- All API features available
-- Best for development
-
-</TabItem>
-</Tabs>
-
-## Use Cases & Examples
-
-### Educational Use Cases
-- **Learning Llama Stack**: Hands-on exploration of API capabilities
-- **Prompt Engineering**: Interactive testing of different prompting strategies
-- **RAG Experimentation**: Understanding how document retrieval affects responses
-- **Evaluation Understanding**: See how different metrics evaluate model performance
-
-### Development Use Cases
-- **Prototype Testing**: Quick validation of application concepts
-- **API Exploration**: Understanding available endpoints and parameters
-- **Integration Planning**: Seeing how different components work together
-- **Demo Creation**: Showcasing Llama Stack capabilities to stakeholders
-
-### Research Use Cases
-- **Model Comparison**: Side-by-side testing of different models
-- **Evaluation Design**: Understanding how scoring functions work
-- **Safety Testing**: Exploring shield effectiveness with different inputs
-- **Performance Analysis**: Measuring model behavior across different scenarios
-
-## Best Practices
-
-### 🚀 **Getting Started**
-- Begin with simple chat interactions to understand basic functionality
-- Gradually explore more advanced features like RAG and evaluations
-- Use the inspection tools to understand your deployment's capabilities
-
-### 🔧 **Development Workflow**
-- Use the playground to prototype before writing application code
-- Test different parameter settings interactively
-- Validate evaluation approaches before implementing them programmatically
-
-### 📊 **Evaluation & Testing**
-- Start with simple scoring functions before trying complex evaluations
-- Use the playground to understand evaluation results before automation
-- Test safety features with various input types
-
-### 🎯 **Production Preparation**
-- Use playground insights to inform your production API usage
-- Test edge cases and error conditions interactively
-- Validate resource configurations before deployment
-
-## Related Resources
-
-- **[Getting Started Guide](../getting_started/quickstart)** - Complete setup and introduction
-- **[Core Concepts](/docs/concepts)** - Understanding Llama Stack fundamentals
-- **[Agents](./agent)** - Building intelligent agents
-- **[RAG (Retrieval Augmented Generation)](./rag)** - Knowledge-enhanced applications
-- **[Evaluations](./evals)** - Comprehensive evaluation framework
-- **[API Reference](/docs/api/llama-stack-specification)** - Complete API documentation
diff --git a/pyproject.toml b/pyproject.toml
index 8f07f9cbd..f8577ad2b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,14 +51,6 @@ dependencies = [
     "sqlalchemy[asyncio]>=2.0.41",                    # server - for conversations
 ]
 
-[project.optional-dependencies]
-ui = [
-    "streamlit",
-    "pandas",
-    "llama-stack-client>=0.3.0",
-    "streamlit-option-menu",
-]
-
 [dependency-groups]
 dev = [
     "pytest>=8.4",
diff --git a/src/llama_stack/core/ui/Containerfile b/src/llama_stack/core/ui/Containerfile
deleted file mode 100644
index 0126d1867..000000000
--- a/src/llama_stack/core/ui/Containerfile
+++ /dev/null
@@ -1,11 +0,0 @@
-# More info on playground configuration can be found here:
-# https://llama-stack.readthedocs.io/en/latest/playground
-
-FROM python:3.12-slim
-WORKDIR /app
-COPY . /app/
-RUN /usr/local/bin/python -m pip install --upgrade pip && \
-    /usr/local/bin/pip3 install -r requirements.txt
-EXPOSE 8501
-
-ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
diff --git a/src/llama_stack/core/ui/README.md b/src/llama_stack/core/ui/README.md
deleted file mode 100644
index 37f1501c9..000000000
--- a/src/llama_stack/core/ui/README.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# (Experimental) LLama Stack UI
-
-## Docker Setup
-
-:warning: This is a work in progress.
-
-## Developer Setup
-
-1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll).
-
-```
-llama stack list-deps together | xargs -L1 uv pip install
-
-llama stack run together
-```
-
-2. (Optional) Register datasets and eval tasks as resources. If you want to run pre-configured evaluation flows (e.g. Evaluations (Generation + Scoring) Page).
-
-```bash
-llama-stack-client datasets register \
---dataset-id "mmlu" \
---provider-id "huggingface" \
---url "https://huggingface.co/datasets/llamastack/evals" \
---metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
---schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
-```
-
-```bash
-llama-stack-client benchmarks register \
---eval-task-id meta-reference-mmlu \
---provider-id meta-reference \
---dataset-id mmlu \
---scoring-functions basic::regex_parser_multiple_choice_answer
-```
-
-3. Start Streamlit UI
-
-```bash
-uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
-```
-
-## Environment Variables
-
-| Environment Variable       | Description                        | Default Value             |
-|----------------------------|------------------------------------|---------------------------|
-| LLAMA_STACK_ENDPOINT       | The endpoint for the Llama Stack   | http://localhost:8321     |
-| FIREWORKS_API_KEY          | API key for Fireworks provider     | (empty string)            |
-| TOGETHER_API_KEY           | API key for Together provider      | (empty string)            |
-| SAMBANOVA_API_KEY          | API key for SambaNova provider     | (empty string)            |
-| OPENAI_API_KEY             | API key for OpenAI provider        | (empty string)            |
diff --git a/src/llama_stack/core/ui/__init__.py b/src/llama_stack/core/ui/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/src/llama_stack/core/ui/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama_stack/core/ui/app.py b/src/llama_stack/core/ui/app.py
deleted file mode 100644
index 441f65d20..000000000
--- a/src/llama_stack/core/ui/app.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import streamlit as st
-
-
-def main():
-    # Evaluation pages
-    application_evaluation_page = st.Page(
-        "page/evaluations/app_eval.py",
-        title="Evaluations (Scoring)",
-        icon="📊",
-        default=False,
-    )
-    native_evaluation_page = st.Page(
-        "page/evaluations/native_eval.py",
-        title="Evaluations (Generation + Scoring)",
-        icon="📊",
-        default=False,
-    )
-
-    # Playground pages
-    chat_page = st.Page("page/playground/chat.py", title="Chat", icon="💬", default=True)
-    rag_page = st.Page("page/playground/rag.py", title="RAG", icon="💬", default=False)
-    tool_page = st.Page("page/playground/tools.py", title="Tools", icon="🛠", default=False)
-
-    # Distribution pages
-    resources_page = st.Page("page/distribution/resources.py", title="Resources", icon="🔍", default=False)
-    provider_page = st.Page(
-        "page/distribution/providers.py",
-        title="API Providers",
-        icon="🔍",
-        default=False,
-    )
-
-    pg = st.navigation(
-        {
-            "Playground": [
-                chat_page,
-                rag_page,
-                tool_page,
-                application_evaluation_page,
-                native_evaluation_page,
-            ],
-            "Inspect": [provider_page, resources_page],
-        },
-        expanded=False,
-    )
-    pg.run()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/llama_stack/core/ui/modules/__init__.py b/src/llama_stack/core/ui/modules/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/src/llama_stack/core/ui/modules/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama_stack/core/ui/modules/api.py b/src/llama_stack/core/ui/modules/api.py
deleted file mode 100644
index 9db87b280..000000000
--- a/src/llama_stack/core/ui/modules/api.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-
-from llama_stack_client import LlamaStackClient
-
-
-class LlamaStackApi:
-    def __init__(self):
-        self.client = LlamaStackClient(
-            base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:8321"),
-            provider_data={
-                "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""),
-                "together_api_key": os.environ.get("TOGETHER_API_KEY", ""),
-                "sambanova_api_key": os.environ.get("SAMBANOVA_API_KEY", ""),
-                "openai_api_key": os.environ.get("OPENAI_API_KEY", ""),
-                "tavily_search_api_key": os.environ.get("TAVILY_SEARCH_API_KEY", ""),
-            },
-        )
-
-    def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
-        """Run scoring on a single row"""
-        if not scoring_params:
-            scoring_params = dict.fromkeys(scoring_function_ids)
-        return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
-
-
-llama_stack_api = LlamaStackApi()
diff --git a/src/llama_stack/core/ui/modules/utils.py b/src/llama_stack/core/ui/modules/utils.py
deleted file mode 100644
index 67cce98fa..000000000
--- a/src/llama_stack/core/ui/modules/utils.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import base64
-import os
-
-import pandas as pd
-import streamlit as st
-
-
-def process_dataset(file):
-    if file is None:
-        return "No file uploaded", None
-
-    try:
-        # Determine file type and read accordingly
-        file_ext = os.path.splitext(file.name)[1].lower()
-        if file_ext == ".csv":
-            df = pd.read_csv(file)
-        elif file_ext in [".xlsx", ".xls"]:
-            df = pd.read_excel(file)
-        else:
-            return "Unsupported file format. Please upload a CSV or Excel file.", None
-
-        return df
-
-    except Exception as e:
-        st.error(f"Error processing file: {str(e)}")
-        return None
-
-
-def data_url_from_file(file) -> str:
-    file_content = file.getvalue()
-    base64_content = base64.b64encode(file_content).decode("utf-8")
-    mime_type = file.type
-
-    data_url = f"data:{mime_type};base64,{base64_content}"
-
-    return data_url
diff --git a/src/llama_stack/core/ui/page/__init__.py b/src/llama_stack/core/ui/page/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/src/llama_stack/core/ui/page/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama_stack/core/ui/page/distribution/__init__.py b/src/llama_stack/core/ui/page/distribution/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/src/llama_stack/core/ui/page/distribution/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama_stack/core/ui/page/distribution/datasets.py b/src/llama_stack/core/ui/page/distribution/datasets.py
deleted file mode 100644
index aab0901ac..000000000
--- a/src/llama_stack/core/ui/page/distribution/datasets.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import streamlit as st
-
-from llama_stack.core.ui.modules.api import llama_stack_api
-
-
-def datasets():
-    st.header("Datasets")
-
-    datasets_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()}
-    if len(datasets_info) > 0:
-        selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
-        st.json(datasets_info[selected_dataset], expanded=True)
diff --git a/src/llama_stack/core/ui/page/distribution/eval_tasks.py b/src/llama_stack/core/ui/page/distribution/eval_tasks.py
deleted file mode 100644
index 1a0ce502b..000000000
--- a/src/llama_stack/core/ui/page/distribution/eval_tasks.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import streamlit as st
-
-from llama_stack.core.ui.modules.api import llama_stack_api
-
-
-def benchmarks():
-    # Benchmarks Section
-    st.header("Benchmarks")
-
-    benchmarks_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.benchmarks.list()}
-
-    if len(benchmarks_info) > 0:
-        selected_benchmark = st.selectbox("Select an eval task", list(benchmarks_info.keys()), key="benchmark_inspect")
-        st.json(benchmarks_info[selected_benchmark], expanded=True)
diff --git a/src/llama_stack/core/ui/page/distribution/models.py b/src/llama_stack/core/ui/page/distribution/models.py
deleted file mode 100644
index e00b327ae..000000000
--- a/src/llama_stack/core/ui/page/distribution/models.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import streamlit as st
-
-from llama_stack.core.ui.modules.api import llama_stack_api
-
-
-def models():
-    # Models Section
-    st.header("Models")
-    models_info = {m.id: m.model_dump() for m in llama_stack_api.client.models.list()}
-
-    selected_model = st.selectbox("Select a model", list(models_info.keys()))
-    st.json(models_info[selected_model])
diff --git a/src/llama_stack/core/ui/page/distribution/providers.py b/src/llama_stack/core/ui/page/distribution/providers.py
deleted file mode 100644
index 3ec6026d1..000000000
--- a/src/llama_stack/core/ui/page/distribution/providers.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import streamlit as st
-
-from llama_stack.core.ui.modules.api import llama_stack_api
-
-
-def providers():
-    st.header("🔍 API Providers")
-    apis_providers_lst = llama_stack_api.client.providers.list()
-    api_to_providers = {}
-    for api_provider in apis_providers_lst:
-        if api_provider.api in api_to_providers:
-            api_to_providers[api_provider.api].append(api_provider)
-        else:
-            api_to_providers[api_provider.api] = [api_provider]
-
-    for api in api_to_providers.keys():
-        st.markdown(f"###### {api}")
-        st.dataframe([x.to_dict() for x in api_to_providers[api]], width=500)
-
-
-providers()
diff --git a/src/llama_stack/core/ui/page/distribution/resources.py b/src/llama_stack/core/ui/page/distribution/resources.py
deleted file mode 100644
index 6e7122ceb..000000000
--- a/src/llama_stack/core/ui/page/distribution/resources.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from streamlit_option_menu import option_menu
-
-from llama_stack.core.ui.page.distribution.datasets import datasets
-from llama_stack.core.ui.page.distribution.eval_tasks import benchmarks
-from llama_stack.core.ui.page.distribution.models import models
-from llama_stack.core.ui.page.distribution.scoring_functions import scoring_functions
-from llama_stack.core.ui.page.distribution.shields import shields
-
-
-def resources_page():
-    options = [
-        "Models",
-        "Shields",
-        "Scoring Functions",
-        "Datasets",
-        "Benchmarks",
-    ]
-    icons = ["magic", "shield", "file-bar-graph", "database", "list-task"]
-    selected_resource = option_menu(
-        None,
-        options,
-        icons=icons,
-        orientation="horizontal",
-        styles={
-            "nav-link": {
-                "font-size": "12px",
-            },
-        },
-    )
-    if selected_resource == "Benchmarks":
-        benchmarks()
-    elif selected_resource == "Datasets":
-        datasets()
-    elif selected_resource == "Models":
-        models()
-    elif selected_resource == "Scoring Functions":
-        scoring_functions()
-    elif selected_resource == "Shields":
-        shields()
-
-
-resources_page()
diff --git a/src/llama_stack/core/ui/page/distribution/scoring_functions.py b/src/llama_stack/core/ui/page/distribution/scoring_functions.py
deleted file mode 100644
index 2a5196fa9..000000000
--- a/src/llama_stack/core/ui/page/distribution/scoring_functions.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import streamlit as st
-
-from llama_stack.core.ui.modules.api import llama_stack_api
-
-
-def scoring_functions():
-    st.header("Scoring Functions")
-
-    scoring_functions_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.scoring_functions.list()}
-
-    selected_scoring_function = st.selectbox("Select a scoring function", list(scoring_functions_info.keys()))
-    st.json(scoring_functions_info[selected_scoring_function], expanded=True)
diff --git a/src/llama_stack/core/ui/page/distribution/shields.py b/src/llama_stack/core/ui/page/distribution/shields.py
deleted file mode 100644
index ecce2f12b..000000000
--- a/src/llama_stack/core/ui/page/distribution/shields.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import streamlit as st
-
-from llama_stack.core.ui.modules.api import llama_stack_api
-
-
-def shields():
-    # Shields Section
-    st.header("Shields")
-
-    shields_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.shields.list()}
-
-    selected_shield = st.selectbox("Select a shield", list(shields_info.keys()))
-    st.json(shields_info[selected_shield])
diff --git a/src/llama_stack/core/ui/page/evaluations/__init__.py b/src/llama_stack/core/ui/page/evaluations/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/src/llama_stack/core/ui/page/evaluations/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama_stack/core/ui/page/evaluations/app_eval.py b/src/llama_stack/core/ui/page/evaluations/app_eval.py
deleted file mode 100644
index 07e6349c9..000000000
--- a/src/llama_stack/core/ui/page/evaluations/app_eval.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import json
-
-import pandas as pd
-import streamlit as st
-
-from llama_stack.core.ui.modules.api import llama_stack_api
-from llama_stack.core.ui.modules.utils import process_dataset
-
-
-def application_evaluation_page():
-    st.set_page_config(page_title="Evaluations (Scoring)", page_icon="🦙")
-    st.title("📊 Evaluations (Scoring)")
-
-    # File uploader
-    uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"])
-
-    if uploaded_file is None:
-        st.error("No file uploaded")
-        return
-
-    # Process uploaded file
-    df = process_dataset(uploaded_file)
-    if df is None:
-        st.error("Error processing file")
-        return
-
-    # Display dataset information
-    st.success("Dataset loaded successfully!")
-
-    # Display dataframe preview
-    st.subheader("Dataset Preview")
-    st.dataframe(df)
-
-    # Select Scoring Functions to Run Evaluation On
-    st.subheader("Select Scoring Functions")
-    scoring_functions = llama_stack_api.client.scoring_functions.list()
-    scoring_functions = {sf.identifier: sf for sf in scoring_functions}
-    scoring_functions_names = list(scoring_functions.keys())
-    selected_scoring_functions = st.multiselect(
-        "Choose one or more scoring functions",
-        options=scoring_functions_names,
-        help="Choose one or more scoring functions.",
-    )
-
-    available_models = llama_stack_api.client.models.list()
-    available_models = [m.identifier for m in available_models]
-
-    scoring_params = {}
-    if selected_scoring_functions:
-        st.write("Selected:")
-        for scoring_fn_id in selected_scoring_functions:
-            scoring_fn = scoring_functions[scoring_fn_id]
-            st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}")
-            new_params = None
-            if scoring_fn.params:
-                new_params = {}
-                for param_name, param_value in scoring_fn.params.to_dict().items():
-                    if param_name == "type":
-                        new_params[param_name] = param_value
-                        continue
-
-                    if param_name == "judge_model":
-                        value = st.selectbox(
-                            f"Select **{param_name}** for {scoring_fn_id}",
-                            options=available_models,
-                            index=0,
-                            key=f"{scoring_fn_id}_{param_name}",
-                        )
-                        new_params[param_name] = value
-                    else:
-                        value = st.text_area(
-                            f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format",
-                            value=json.dumps(param_value, indent=2),
-                            height=80,
-                        )
-                        try:
-                            new_params[param_name] = json.loads(value)
-                        except json.JSONDecodeError:
-                            st.error(f"Invalid JSON for **{param_name}** in {scoring_fn_id}")
-
-                st.json(new_params)
-            scoring_params[scoring_fn_id] = new_params
-
-        # Add run evaluation button & slider
-        total_rows = len(df)
-        num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows)
-
-        if st.button("Run Evaluation"):
-            progress_text = "Running evaluation..."
-            progress_bar = st.progress(0, text=progress_text)
-            rows = df.to_dict(orient="records")
-            if num_rows < total_rows:
-                rows = rows[:num_rows]
-
-            # Create separate containers for progress text and results
-            progress_text_container = st.empty()
-            results_container = st.empty()
-            output_res = {}
-            for i, r in enumerate(rows):
-                # Update progress
-                progress = i / len(rows)
-                progress_bar.progress(progress, text=progress_text)
-
-                # Run evaluation for current row
-                score_res = llama_stack_api.run_scoring(
-                    r,
-                    scoring_function_ids=selected_scoring_functions,
-                    scoring_params=scoring_params,
-                )
-
-                for k in r.keys():
-                    if k not in output_res:
-                        output_res[k] = []
-                    output_res[k].append(r[k])
-
-                for fn_id in selected_scoring_functions:
-                    if fn_id not in output_res:
-                        output_res[fn_id] = []
-                    output_res[fn_id].append(score_res.results[fn_id].score_rows[0])
-
-                # Display current row results using separate containers
-                progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})")
-                results_container.json(
-                    score_res.to_json(),
-                    expanded=2,
-                )
-
-            progress_bar.progress(1.0, text="Evaluation complete!")
-
-            # Display results in dataframe
-            if output_res:
-                output_df = pd.DataFrame(output_res)
-                st.subheader("Evaluation Results")
-                st.dataframe(output_df)
-
-
-application_evaluation_page()
diff --git a/src/llama_stack/core/ui/page/evaluations/native_eval.py b/src/llama_stack/core/ui/page/evaluations/native_eval.py
deleted file mode 100644
index 2bef63b2f..000000000
--- a/src/llama_stack/core/ui/page/evaluations/native_eval.py
+++ /dev/null
@@ -1,253 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import json
-
-import pandas as pd
-import streamlit as st
-
-from llama_stack.core.ui.modules.api import llama_stack_api
-
-
-def select_benchmark_1():
-    # Select Benchmarks
-    st.subheader("1. Choose An Eval Task")
-    benchmarks = llama_stack_api.client.benchmarks.list()
-    benchmarks = {et.identifier: et for et in benchmarks}
-    benchmarks_names = list(benchmarks.keys())
-    selected_benchmark = st.selectbox(
-        "Choose an eval task.",
-        options=benchmarks_names,
-        help="Choose an eval task. Each eval task is parameterized by a dataset, and list of scoring functions.",
-    )
-    with st.expander("View Eval Task"):
-        st.json(benchmarks[selected_benchmark], expanded=True)
-
-    st.session_state["selected_benchmark"] = selected_benchmark
-    st.session_state["benchmarks"] = benchmarks
-    if st.button("Confirm", key="confirm_1"):
-        st.session_state["selected_benchmark_1_next"] = True
-
-
-def define_eval_candidate_2():
-    if not st.session_state.get("selected_benchmark_1_next", None):
-        return
-
-    st.subheader("2. Define Eval Candidate")
-    st.info(
-        """
-        Define the configurations for the evaluation candidate model or agent used for generation.
-        Select "model" if you want to run generation with inference API, or "agent" if you want to run generation with agent API through specifying AgentConfig.
-        """
-    )
-    with st.expander("Define Eval Candidate", expanded=True):
-        # Define Eval Candidate
-        candidate_type = st.radio("Candidate Type", ["model", "agent"])
-
-        available_models = llama_stack_api.client.models.list()
-        available_models = [model.identifier for model in available_models]
-        selected_model = st.selectbox(
-            "Choose a model",
-            available_models,
-            index=0,
-        )
-
-        # Sampling Parameters
-        st.markdown("##### Sampling Parameters")
-        temperature = st.slider(
-            "Temperature",
-            min_value=0.0,
-            max_value=1.0,
-            value=0.0,
-            step=0.1,
-            help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
-        )
-        top_p = st.slider(
-            "Top P",
-            min_value=0.0,
-            max_value=1.0,
-            value=0.95,
-            step=0.1,
-        )
-        max_tokens = st.slider(
-            "Max Tokens",
-            min_value=0,
-            max_value=4096,
-            value=512,
-            step=1,
-            help="The maximum number of tokens to generate",
-        )
-        repetition_penalty = st.slider(
-            "Repetition Penalty",
-            min_value=1.0,
-            max_value=2.0,
-            value=1.0,
-            step=0.1,
-            help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
-        )
-        if candidate_type == "model":
-            if temperature > 0.0:
-                strategy = {
-                    "type": "top_p",
-                    "temperature": temperature,
-                    "top_p": top_p,
-                }
-            else:
-                strategy = {"type": "greedy"}
-
-            eval_candidate = {
-                "type": "model",
-                "model": selected_model,
-                "sampling_params": {
-                    "strategy": strategy,
-                    "max_tokens": max_tokens,
-                    "repetition_penalty": repetition_penalty,
-                },
-            }
-        elif candidate_type == "agent":
-            system_prompt = st.text_area(
-                "System Prompt",
-                value="You are a helpful AI assistant.",
-                help="Initial instructions given to the AI to set its behavior and context",
-            )
-            tools_json = st.text_area(
-                "Tools Configuration (JSON)",
-                value=json.dumps(
-                    [
-                        {
-                            "type": "brave_search",
-                            "engine": "brave",
-                            "api_key": "ENTER_BRAVE_API_KEY_HERE",
-                        }
-                    ]
-                ),
-                help="Enter tool configurations in JSON format. Each tool should have a name, description, and parameters.",
-                height=200,
-            )
-            try:
-                tools = json.loads(tools_json)
-            except json.JSONDecodeError:
-                st.error("Invalid JSON format for tools configuration")
-                tools = []
-            eval_candidate = {
-                "type": "agent",
-                "config": {
-                    "model": selected_model,
-                    "instructions": system_prompt,
-                    "tools": tools,
-                    "tool_choice": "auto",
-                    "tool_prompt_format": "json",
-                    "input_shields": [],
-                    "output_shields": [],
-                    "enable_session_persistence": False,
-                },
-            }
-        st.session_state["eval_candidate"] = eval_candidate
-
-    if st.button("Confirm", key="confirm_2"):
-        st.session_state["selected_eval_candidate_2_next"] = True
-
-
-def run_evaluation_3():
-    if not st.session_state.get("selected_eval_candidate_2_next", None):
-        return
-
-    st.subheader("3. Run Evaluation")
-    # Add info box to explain configurations being used
-    st.info(
-        """
-        Review the configurations that will be used for this evaluation run, make any necessary changes, and then click the "Run Evaluation" button.
-        """
-    )
-    selected_benchmark = st.session_state["selected_benchmark"]
-    benchmarks = st.session_state["benchmarks"]
-    eval_candidate = st.session_state["eval_candidate"]
-
-    dataset_id = benchmarks[selected_benchmark].dataset_id
-    rows = llama_stack_api.client.datasets.iterrows(
-        dataset_id=dataset_id,
-    )
-    total_rows = len(rows.data)
-    # Add number of examples control
-    num_rows = st.number_input(
-        "Number of Examples to Evaluate",
-        min_value=1,
-        max_value=total_rows,
-        value=5,
-        help="Number of examples from the dataset to evaluate. ",
-    )
-
-    benchmark_config = {
-        "type": "benchmark",
-        "eval_candidate": eval_candidate,
-        "scoring_params": {},
-    }
-
-    with st.expander("View Evaluation Task", expanded=True):
-        st.json(benchmarks[selected_benchmark], expanded=True)
-    with st.expander("View Evaluation Task Configuration", expanded=True):
-        st.json(benchmark_config, expanded=True)
-
-    # Add run button and handle evaluation
-    if st.button("Run Evaluation"):
-        progress_text = "Running evaluation..."
-        progress_bar = st.progress(0, text=progress_text)
-        rows = rows.data
-        if num_rows < total_rows:
-            rows = rows[:num_rows]
-
-        # Create separate containers for progress text and results
-        progress_text_container = st.empty()
-        results_container = st.empty()
-        output_res = {}
-        for i, r in enumerate(rows):
-            # Update progress
-            progress = i / len(rows)
-            progress_bar.progress(progress, text=progress_text)
-            # Run evaluation for current row
-            eval_res = llama_stack_api.client.eval.evaluate_rows(
-                benchmark_id=selected_benchmark,
-                input_rows=[r],
-                scoring_functions=benchmarks[selected_benchmark].scoring_functions,
-                benchmark_config=benchmark_config,
-            )
-
-            for k in r.keys():
-                if k not in output_res:
-                    output_res[k] = []
-                output_res[k].append(r[k])
-
-            for k in eval_res.generations[0].keys():
-                if k not in output_res:
-                    output_res[k] = []
-                output_res[k].append(eval_res.generations[0][k])
-
-            for scoring_fn in benchmarks[selected_benchmark].scoring_functions:
-                if scoring_fn not in output_res:
-                    output_res[scoring_fn] = []
-                output_res[scoring_fn].append(eval_res.scores[scoring_fn].score_rows[0])
-
-            progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})")
-            results_container.json(eval_res, expanded=2)
-
-        progress_bar.progress(1.0, text="Evaluation complete!")
-        # Display results in dataframe
-        if output_res:
-            output_df = pd.DataFrame(output_res)
-            st.subheader("Evaluation Results")
-            st.dataframe(output_df)
-
-
-def native_evaluation_page():
-    st.set_page_config(page_title="Evaluations (Generation + Scoring)", page_icon="🦙")
-    st.title("📊 Evaluations (Generation + Scoring)")
-
-    select_benchmark_1()
-    define_eval_candidate_2()
-    run_evaluation_3()
-
-
-native_evaluation_page()
diff --git a/src/llama_stack/core/ui/page/playground/__init__.py b/src/llama_stack/core/ui/page/playground/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/src/llama_stack/core/ui/page/playground/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama_stack/core/ui/page/playground/chat.py b/src/llama_stack/core/ui/page/playground/chat.py
deleted file mode 100644
index c813f05dc..000000000
--- a/src/llama_stack/core/ui/page/playground/chat.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import streamlit as st
-
-from llama_stack.core.ui.modules.api import llama_stack_api
-
-# Sidebar configurations
-with st.sidebar:
-    st.header("Configuration")
-    available_models = llama_stack_api.client.models.list()
-    available_models = [
-        model.id
-        for model in available_models
-        if model.custom_metadata and model.custom_metadata.get("model_type") == "llm"
-    ]
-    selected_model = st.selectbox(
-        "Choose a model",
-        available_models,
-        index=0,
-    )
-
-    temperature = st.slider(
-        "Temperature",
-        min_value=0.0,
-        max_value=1.0,
-        value=0.0,
-        step=0.1,
-        help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
-    )
-
-    top_p = st.slider(
-        "Top P",
-        min_value=0.0,
-        max_value=1.0,
-        value=0.95,
-        step=0.1,
-    )
-
-    max_tokens = st.slider(
-        "Max Tokens",
-        min_value=0,
-        max_value=4096,
-        value=512,
-        step=1,
-        help="The maximum number of tokens to generate",
-    )
-
-    repetition_penalty = st.slider(
-        "Repetition Penalty",
-        min_value=1.0,
-        max_value=2.0,
-        value=1.0,
-        step=0.1,
-        help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
-    )
-
-    stream = st.checkbox("Stream", value=True)
-    system_prompt = st.text_area(
-        "System Prompt",
-        value="You are a helpful AI assistant.",
-        help="Initial instructions given to the AI to set its behavior and context",
-    )
-
-    # Add clear chat button to sidebar
-    if st.button("Clear Chat", use_container_width=True):
-        st.session_state.messages = []
-        st.rerun()
-
-
-# Main chat interface
-st.title("🦙 Chat")
-
-
-# Initialize chat history
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-
-# Display chat messages
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-
-# Chat input
-if prompt := st.chat_input("Example: What is Llama Stack?"):
-    # Add user message to chat history
-    st.session_state.messages.append({"role": "user", "content": prompt})
-
-    # Display user message
-    with st.chat_message("user"):
-        st.markdown(prompt)
-
-    # Display assistant response
-    with st.chat_message("assistant"):
-        message_placeholder = st.empty()
-        full_response = ""
-
-        if temperature > 0.0:
-            strategy = {
-                "type": "top_p",
-                "temperature": temperature,
-                "top_p": top_p,
-            }
-        else:
-            strategy = {"type": "greedy"}
-
-        response = llama_stack_api.client.inference.chat_completion(
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": prompt},
-            ],
-            model_id=selected_model,
-            stream=stream,
-            sampling_params={
-                "strategy": strategy,
-                "max_tokens": max_tokens,
-                "repetition_penalty": repetition_penalty,
-            },
-        )
-
-        if stream:
-            for chunk in response:
-                if chunk.event.event_type == "progress":
-                    full_response += chunk.event.delta.text
-                message_placeholder.markdown(full_response + "▌")
-            message_placeholder.markdown(full_response)
-        else:
-            full_response = response.completion_message.content
-            message_placeholder.markdown(full_response)
-
-        st.session_state.messages.append({"role": "assistant", "content": full_response})
diff --git a/src/llama_stack/core/ui/page/playground/tools.py b/src/llama_stack/core/ui/page/playground/tools.py
deleted file mode 100644
index 16fd464ee..000000000
--- a/src/llama_stack/core/ui/page/playground/tools.py
+++ /dev/null
@@ -1,352 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import enum
-import json
-import uuid
-
-import streamlit as st
-from llama_stack_client import Agent
-from llama_stack_client.lib.agents.react.agent import ReActAgent
-from llama_stack_client.lib.agents.react.tool_parser import ReActOutput
-
-from llama_stack.core.ui.modules.api import llama_stack_api
-
-
-class AgentType(enum.Enum):
-    REGULAR = "Regular"
-    REACT = "ReAct"
-
-
-def tool_chat_page():
-    st.title("🛠 Tools")
-
-    client = llama_stack_api.client
-    models = client.models.list()
-    model_list = [model.identifier for model in models if model.api_model_type == "llm"]
-
-    tool_groups = client.toolgroups.list()
-    tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
-    mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
-    builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
-    selected_vector_stores = []
-
-    def reset_agent():
-        st.session_state.clear()
-        st.cache_resource.clear()
-
-    with st.sidebar:
-        st.title("Configuration")
-        st.subheader("Model")
-        model = st.selectbox(label="Model", options=model_list, on_change=reset_agent, label_visibility="collapsed")
-
-        st.subheader("Available ToolGroups")
-
-        toolgroup_selection = st.pills(
-            label="Built-in tools",
-            options=builtin_tools_list,
-            selection_mode="multi",
-            on_change=reset_agent,
-            format_func=lambda tool: "".join(tool.split("::")[1:]),
-            help="List of built-in tools from your llama stack server.",
-        )
-
-        if "builtin::rag" in toolgroup_selection:
-            vector_stores = llama_stack_api.client.vector_stores.list() or []
-            if not vector_stores:
-                st.info("No vector databases available for selection.")
-            vector_stores = [vector_store.identifier for vector_store in vector_stores]
-            selected_vector_stores = st.multiselect(
-                label="Select Document Collections to use in RAG queries",
-                options=vector_stores,
-                on_change=reset_agent,
-            )
-
-        mcp_selection = st.pills(
-            label="MCP Servers",
-            options=mcp_tools_list,
-            selection_mode="multi",
-            on_change=reset_agent,
-            format_func=lambda tool: "".join(tool.split("::")[1:]),
-            help="List of MCP servers registered to your llama stack server.",
-        )
-
-        toolgroup_selection.extend(mcp_selection)
-
-        grouped_tools = {}
-        total_tools = 0
-
-        for toolgroup_id in toolgroup_selection:
-            tools = client.tools.list(toolgroup_id=toolgroup_id)
-            grouped_tools[toolgroup_id] = [tool.name for tool in tools]
-            total_tools += len(tools)
-
-        st.markdown(f"Active Tools: 🛠 {total_tools}")
-
-        for group_id, tools in grouped_tools.items():
-            with st.expander(f"🔧 Tools from `{group_id}`"):
-                for idx, tool in enumerate(tools, start=1):
-                    st.markdown(f"{idx}. `{tool.split(':')[-1]}`")
-
-        st.subheader("Agent Configurations")
-        st.subheader("Agent Type")
-        agent_type = st.radio(
-            label="Select Agent Type",
-            options=["Regular", "ReAct"],
-            on_change=reset_agent,
-        )
-
-        if agent_type == "ReAct":
-            agent_type = AgentType.REACT
-        else:
-            agent_type = AgentType.REGULAR
-
-        max_tokens = st.slider(
-            "Max Tokens",
-            min_value=0,
-            max_value=4096,
-            value=512,
-            step=64,
-            help="The maximum number of tokens to generate",
-            on_change=reset_agent,
-        )
-
-    for i, tool_name in enumerate(toolgroup_selection):
-        if tool_name == "builtin::rag":
-            tool_dict = dict(
-                name="builtin::rag",
-                args={
-                    "vector_store_ids": list(selected_vector_stores),
-                },
-            )
-            toolgroup_selection[i] = tool_dict
-
-    @st.cache_resource
-    def create_agent():
-        if "agent_type" in st.session_state and st.session_state.agent_type == AgentType.REACT:
-            return ReActAgent(
-                client=client,
-                model=model,
-                tools=toolgroup_selection,
-                response_format={
-                    "type": "json_schema",
-                    "json_schema": ReActOutput.model_json_schema(),
-                },
-                sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
-            )
-        else:
-            return Agent(
-                client,
-                model=model,
-                instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
-                tools=toolgroup_selection,
-                sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
-            )
-
-    st.session_state.agent_type = agent_type
-
-    agent = create_agent()
-
-    if "agent_session_id" not in st.session_state:
-        st.session_state["agent_session_id"] = agent.create_session(session_name=f"tool_demo_{uuid.uuid4()}")
-
-    session_id = st.session_state["agent_session_id"]
-
-    if "messages" not in st.session_state:
-        st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
-
-    for msg in st.session_state.messages:
-        with st.chat_message(msg["role"]):
-            st.markdown(msg["content"])
-
-    if prompt := st.chat_input(placeholder=""):
-        with st.chat_message("user"):
-            st.markdown(prompt)
-
-        st.session_state.messages.append({"role": "user", "content": prompt})
-
-        turn_response = agent.create_turn(
-            session_id=session_id,
-            messages=[{"role": "user", "content": prompt}],
-            stream=True,
-        )
-
-        def response_generator(turn_response):
-            if st.session_state.get("agent_type") == AgentType.REACT:
-                return _handle_react_response(turn_response)
-            else:
-                return _handle_regular_response(turn_response)
-
-        def _handle_react_response(turn_response):
-            current_step_content = ""
-            final_answer = None
-            tool_results = []
-
-            for response in turn_response:
-                if not hasattr(response.event, "payload"):
-                    yield (
-                        "\n\n🚨 :red[_Llama Stack server Error:_]\n"
-                        "The response received is missing an expected `payload` attribute.\n"
-                        "This could indicate a malformed response or an internal issue within the server.\n\n"
-                        f"Error details: {response}"
-                    )
-                    return
-
-                payload = response.event.payload
-
-                if payload.event_type == "step_progress" and hasattr(payload.delta, "text"):
-                    current_step_content += payload.delta.text
-                    continue
-
-                if payload.event_type == "step_complete":
-                    step_details = payload.step_details
-
-                    if step_details.step_type == "inference":
-                        yield from _process_inference_step(current_step_content, tool_results, final_answer)
-                        current_step_content = ""
-                    elif step_details.step_type == "tool_execution":
-                        tool_results = _process_tool_execution(step_details, tool_results)
-                        current_step_content = ""
-                    else:
-                        current_step_content = ""
-
-            if not final_answer and tool_results:
-                yield from _format_tool_results_summary(tool_results)
-
-        def _process_inference_step(current_step_content, tool_results, final_answer):
-            try:
-                react_output_data = json.loads(current_step_content)
-                thought = react_output_data.get("thought")
-                action = react_output_data.get("action")
-                answer = react_output_data.get("answer")
-
-                if answer and answer != "null" and answer is not None:
-                    final_answer = answer
-
-                if thought:
-                    with st.expander("🤔 Thinking...", expanded=False):
-                        st.markdown(f":grey[__{thought}__]")
-
-                if action and isinstance(action, dict):
-                    tool_name = action.get("tool_name")
-                    tool_params = action.get("tool_params")
-                    with st.expander(f'🛠 Action: Using tool "{tool_name}"', expanded=False):
-                        st.json(tool_params)
-
-                if answer and answer != "null" and answer is not None:
-                    yield f"\n\n✅ **Final Answer:**\n{answer}"
-
-            except json.JSONDecodeError:
-                yield f"\n\nFailed to parse ReAct step content:\n```json\n{current_step_content}\n```"
-            except Exception as e:
-                yield f"\n\nFailed to process ReAct step: {e}\n```json\n{current_step_content}\n```"
-
-            return final_answer
-
-        def _process_tool_execution(step_details, tool_results):
-            try:
-                if hasattr(step_details, "tool_responses") and step_details.tool_responses:
-                    for tool_response in step_details.tool_responses:
-                        tool_name = tool_response.tool_name
-                        content = tool_response.content
-                        tool_results.append((tool_name, content))
-                        with st.expander(f'⚙️ Observation (Result from "{tool_name}")', expanded=False):
-                            try:
-                                parsed_content = json.loads(content)
-                                st.json(parsed_content)
-                            except json.JSONDecodeError:
-                                st.code(content, language=None)
-                else:
-                    with st.expander("⚙️ Observation", expanded=False):
-                        st.markdown(":grey[_Tool execution step completed, but no response data found._]")
-            except Exception as e:
-                with st.expander("⚙️ Error in Tool Execution", expanded=False):
-                    st.markdown(f":red[_Error processing tool execution: {str(e)}_]")
-
-            return tool_results
-
-        def _format_tool_results_summary(tool_results):
-            yield "\n\n**Here's what I found:**\n"
-            for tool_name, content in tool_results:
-                try:
-                    parsed_content = json.loads(content)
-
-                    if tool_name == "web_search" and "top_k" in parsed_content:
-                        yield from _format_web_search_results(parsed_content)
-                    elif "results" in parsed_content and isinstance(parsed_content["results"], list):
-                        yield from _format_results_list(parsed_content["results"])
-                    elif isinstance(parsed_content, dict) and len(parsed_content) > 0:
-                        yield from _format_dict_results(parsed_content)
-                    elif isinstance(parsed_content, list) and len(parsed_content) > 0:
-                        yield from _format_list_results(parsed_content)
-                except json.JSONDecodeError:
-                    yield f"\n**{tool_name}** was used but returned complex data. Check the observation for details.\n"
-                except (TypeError, AttributeError, KeyError, IndexError) as e:
-                    print(f"Error processing {tool_name} result: {type(e).__name__}: {e}")
-
-        def _format_web_search_results(parsed_content):
-            for i, result in enumerate(parsed_content["top_k"], 1):
-                if i <= 3:
-                    title = result.get("title", "Untitled")
-                    url = result.get("url", "")
-                    content_text = result.get("content", "").strip()
-                    yield f"\n- **{title}**\n  {content_text}\n  [Source]({url})\n"
-
-        def _format_results_list(results):
-            for i, result in enumerate(results, 1):
-                if i <= 3:
-                    if isinstance(result, dict):
-                        name = result.get("name", result.get("title", "Result " + str(i)))
-                        description = result.get("description", result.get("content", result.get("summary", "")))
-                        yield f"\n- **{name}**\n  {description}\n"
-                    else:
-                        yield f"\n- {result}\n"
-
-        def _format_dict_results(parsed_content):
-            yield "\n```\n"
-            for key, value in list(parsed_content.items())[:5]:
-                if isinstance(value, str) and len(value) < 100:
-                    yield f"{key}: {value}\n"
-                else:
-                    yield f"{key}: [Complex data]\n"
-            yield "```\n"
-
-        def _format_list_results(parsed_content):
-            yield "\n"
-            for _, item in enumerate(parsed_content[:3], 1):
-                if isinstance(item, str):
-                    yield f"- {item}\n"
-                elif isinstance(item, dict) and "text" in item:
-                    yield f"- {item['text']}\n"
-                elif isinstance(item, dict) and len(item) > 0:
-                    first_value = next(iter(item.values()))
-                    if isinstance(first_value, str) and len(first_value) < 100:
-                        yield f"- {first_value}\n"
-
-        def _handle_regular_response(turn_response):
-            for response in turn_response:
-                if hasattr(response.event, "payload"):
-                    print(response.event.payload)
-                    if response.event.payload.event_type == "step_progress":
-                        if hasattr(response.event.payload.delta, "text"):
-                            yield response.event.payload.delta.text
-                    if response.event.payload.event_type == "step_complete":
-                        if response.event.payload.step_details.step_type == "tool_execution":
-                            if response.event.payload.step_details.tool_calls:
-                                tool_name = str(response.event.payload.step_details.tool_calls[0].tool_name)
-                                yield f'\n\n🛠 :grey[_Using "{tool_name}" tool:_]\n\n'
-                            else:
-                                yield "No tool_calls present in step_details"
-                else:
-                    yield f"Error occurred in the Llama Stack Cluster: {response}"
-
-        with st.chat_message("assistant"):
-            response_content = st.write_stream(response_generator(turn_response))
-
-        st.session_state.messages.append({"role": "assistant", "content": response_content})
-
-
-tool_chat_page()
diff --git a/src/llama_stack/core/ui/requirements.txt b/src/llama_stack/core/ui/requirements.txt
deleted file mode 100644
index 53a1e7bf3..000000000
--- a/src/llama_stack/core/ui/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-llama-stack>=0.2.1
-llama-stack-client>=0.2.1
-pandas
-streamlit
-streamlit-option-menu
diff --git a/uv.lock b/uv.lock
index de1c8879c..b2e562abc 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1963,14 +1963,6 @@ dependencies = [
     { name = "uvicorn" },
 ]
 
-[package.optional-dependencies]
-ui = [
-    { name = "llama-stack-client" },
-    { name = "pandas" },
-    { name = "streamlit" },
-    { name = "streamlit-option-menu" },
-]
-
 [package.dev-dependencies]
 benchmark = [
     { name = "locust" },
@@ -2097,11 +2089,9 @@ requires-dist = [
     { name = "jinja2", specifier = ">=3.1.6" },
     { name = "jsonschema" },
     { name = "llama-stack-client", specifier = ">=0.3.0" },
-    { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.3.0" },
     { name = "openai", specifier = ">=2.5.0" },
     { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
     { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
-    { name = "pandas", marker = "extra == 'ui'" },
     { name = "pillow" },
     { name = "prompt-toolkit" },
     { name = "pydantic", specifier = ">=2.11.9" },
@@ -2111,13 +2101,10 @@ requires-dist = [
     { name = "rich" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
     { name = "starlette" },
-    { name = "streamlit", marker = "extra == 'ui'" },
-    { name = "streamlit-option-menu", marker = "extra == 'ui'" },
     { name = "termcolor" },
     { name = "tiktoken" },
     { name = "uvicorn", specifier = ">=0.34.0" },
 ]
-provides-extras = ["ui"]
 
 [package.metadata.requires-dev]
 benchmark = [{ name = "locust", specifier = ">=2.39.1" }]