chore(ui): remove the Streamlit UI (#4097)

2025-12-03 09:53:45 +00:00 · 2025-11-06 15:51:57 -08:00 · 2025-11-06 15:51:57 -08:00 · a2c4c12384
commit a2c4c12384
parent 939a2db58f
27 changed files with 0 additions and 1597 deletions
--- a/docs/docs/building_applications/index.mdx
+++ b/docs/docs/building_applications/index.mdx
@ -35,9 +35,6 @@ Here are the key topics that will help you build effective AI applications:
 - **[Telemetry](./telemetry.mdx)** - Monitor and analyze your agents' performance and behavior
 - **[Safety](./safety.mdx)** - Implement guardrails and safety measures to ensure responsible AI behavior
 ### 🎮 **Interactive Development**
 - **[Playground](./playground.mdx)** - Interactive environment for testing and developing applications
 ## Application Patterns
 ### 🤖 **Conversational Agents**
--- a/docs/docs/building_applications/playground.mdx
+++ b/docs/docs/building_applications/playground.mdx
@ -1,298 +0,0 @@
 ---
 title: Llama Stack Playground
 description: Interactive interface to explore and experiment with Llama Stack capabilities
 sidebar_label: Playground
 sidebar_position: 10
 ---
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 # Llama Stack Playground
 :::note[Experimental Feature]
 The Llama Stack Playground is currently experimental and subject to change. We welcome feedback and contributions to help improve it.
 :::
 The Llama Stack Playground is a simple interface that aims to:
 - **Showcase capabilities and concepts** of Llama Stack in an interactive environment
 - **Demo end-to-end application code** to help users get started building their own applications
 - **Provide a UI** to help users inspect and understand Llama Stack API providers and resources
 ## Key Features
 ### Interactive Playground Pages
 The playground provides interactive pages for users to explore Llama Stack API capabilities:
 #### Chatbot Interface
 <video
  controls
  autoPlay
  playsInline
  muted
  loop
  style={{width: '100%'}}
 >
  <source src="https://github.com/user-attachments/assets/8d2ef802-5812-4a28-96e1-316038c84cbf" type="video/mp4" />
  Your browser does not support the video tag.
 </video>
 <Tabs>
 <TabItem value="chat" label="Chat">
 **Simple Chat Interface**
 - Chat directly with Llama models through an intuitive interface
 - Uses the `/chat/completions` streaming API under the hood
 - Real-time message streaming for responsive interactions
 - Perfect for testing model capabilities and prompt engineering
 </TabItem>
 <TabItem value="rag" label="RAG Chat">
 **Document-Aware Conversations**
 - Upload documents to create memory banks
 - Chat with a RAG-enabled agent that can query your documents
 - Uses Llama Stack's `/agents` API to create and manage RAG sessions
 - Ideal for exploring knowledge-enhanced AI applications
 </TabItem>
 </Tabs>
 #### Evaluation Interface
 <video
  controls
  autoPlay
  playsInline
  muted
  loop
  style={{width: '100%'}}
 >
  <source src="https://github.com/user-attachments/assets/6cc1659f-eba4-49ca-a0a5-7c243557b4f5" type="video/mp4" />
  Your browser does not support the video tag.
 </video>
 <Tabs>
 <TabItem value="scoring" label="Scoring Evaluations">
 **Custom Dataset Evaluation**
 - Upload your own evaluation datasets
 - Run evaluations using available scoring functions
 - Uses Llama Stack's `/scoring` API for flexible evaluation workflows
 - Great for testing application performance on custom metrics
 </TabItem>
 <TabItem value="benchmarks" label="Benchmark Evaluations">
 <video
  controls
  autoPlay
  playsInline
  muted
  loop
  style={{width: '100%', marginBottom: '1rem'}}
 >
  <source src="https://github.com/user-attachments/assets/345845c7-2a2b-4095-960a-9ae40f6a93cf" type="video/mp4" />
  Your browser does not support the video tag.
 </video>
 **Pre-registered Evaluation Tasks**
 - Evaluate models or agents on pre-defined tasks
 - Uses Llama Stack's `/eval` API for comprehensive evaluation
 - Combines datasets and scoring functions for standardized testing
 **Setup Requirements:**
 Register evaluation datasets and benchmarks first:
 ```bash
 # Register evaluation dataset
 llama-stack-client datasets register \
  --dataset-id "mmlu" \
  --provider-id "huggingface" \
  --url "https://huggingface.co/datasets/llamastack/evals" \
  --metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
  --schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string"}, "chat_completion_input": {"type": "string"}}'
 # Register benchmark task
 llama-stack-client benchmarks register \
  --eval-task-id meta-reference-mmlu \
  --provider-id meta-reference \
  --dataset-id mmlu \
  --scoring-functions basic::regex_parser_multiple_choice_answer
 ```
 </TabItem>
 </Tabs>
 #### Inspection Interface
 <video
  controls
  autoPlay
  playsInline
  muted
  loop
  style={{width: '100%'}}
 >
  <source src="https://github.com/user-attachments/assets/01d52b2d-92af-4e3a-b623-a9b8ba22ba99" type="video/mp4" />
  Your browser does not support the video tag.
 </video>
 <Tabs>
 <TabItem value="providers" label="API Providers">
 **Provider Management**
 - Inspect available Llama Stack API providers
 - View provider configurations and capabilities
 - Uses the `/providers` API for real-time provider information
 - Essential for understanding your deployment's capabilities
 </TabItem>
 <TabItem value="resources" label="API Resources">
 **Resource Exploration**
 - Inspect Llama Stack API resources including:
  - **Models**: Available language models
  - **Datasets**: Registered evaluation datasets
  - **Memory Banks**: Vector databases and knowledge stores
  - **Benchmarks**: Evaluation tasks and scoring functions
  - **Shields**: Safety and content moderation tools
 - Uses `/<resources>/list` APIs for comprehensive resource visibility
 - For detailed information about resources, see [Core Concepts](/docs/concepts)
 </TabItem>
 </Tabs>
 ## Getting Started
 ### Quick Start Guide
 <Tabs>
 <TabItem value="setup" label="Setup">
 **1. Start the Llama Stack API Server**
 ```bash
 llama stack list-deps together | xargs -L1 uv pip install
 llama stack run together
 ```
 **2. Start the Streamlit UI**
 ```bash
 # Launch the playground interface
 uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
 ```
 </TabItem>
 <TabItem value="usage" label="Usage Tips">
 **Making the Most of the Playground:**
 - **Start with Chat**: Test basic model interactions and prompt engineering
 - **Explore RAG**: Upload sample documents to see knowledge-enhanced responses
 - **Try Evaluations**: Use the scoring interface to understand evaluation metrics
 - **Inspect Resources**: Check what providers and resources are available
 - **Experiment with Settings**: Adjust parameters to see how they affect results
 </TabItem>
 </Tabs>
 ### Available Distributions
 The playground works with any Llama Stack distribution. Popular options include:
 <Tabs>
 <TabItem value="together" label="Together AI">
 ```bash
 llama stack list-deps together | xargs -L1 uv pip install
 llama stack run together
 ```
 **Features:**
 - Cloud-hosted models
 - Fast inference
 - Multiple model options
 </TabItem>
 <TabItem value="ollama" label="Ollama (Local)">
 ```bash
 llama stack list-deps ollama | xargs -L1 uv pip install
 llama stack run ollama
 ```
 **Features:**
 - Local model execution
 - Privacy-focused
 - No internet required
 </TabItem>
 <TabItem value="meta-reference" label="Meta Reference">
 ```bash
 llama stack list-deps meta-reference | xargs -L1 uv pip install
 llama stack run meta-reference
 ```
 **Features:**
 - Reference implementation
 - All API features available
 - Best for development
 </TabItem>
 </Tabs>
 ## Use Cases & Examples
 ### Educational Use Cases
 - **Learning Llama Stack**: Hands-on exploration of API capabilities
 - **Prompt Engineering**: Interactive testing of different prompting strategies
 - **RAG Experimentation**: Understanding how document retrieval affects responses
 - **Evaluation Understanding**: See how different metrics evaluate model performance
 ### Development Use Cases
 - **Prototype Testing**: Quick validation of application concepts
 - **API Exploration**: Understanding available endpoints and parameters
 - **Integration Planning**: Seeing how different components work together
 - **Demo Creation**: Showcasing Llama Stack capabilities to stakeholders
 ### Research Use Cases
 - **Model Comparison**: Side-by-side testing of different models
 - **Evaluation Design**: Understanding how scoring functions work
 - **Safety Testing**: Exploring shield effectiveness with different inputs
 - **Performance Analysis**: Measuring model behavior across different scenarios
 ## Best Practices
 ### 🚀 **Getting Started**
 - Begin with simple chat interactions to understand basic functionality
 - Gradually explore more advanced features like RAG and evaluations
 - Use the inspection tools to understand your deployment's capabilities
 ### 🔧 **Development Workflow**
 - Use the playground to prototype before writing application code
 - Test different parameter settings interactively
 - Validate evaluation approaches before implementing them programmatically
 ### 📊 **Evaluation & Testing**
 - Start with simple scoring functions before trying complex evaluations
 - Use the playground to understand evaluation results before automation
 - Test safety features with various input types
 ### 🎯 **Production Preparation**
 - Use playground insights to inform your production API usage
 - Test edge cases and error conditions interactively
 - Validate resource configurations before deployment
 ## Related Resources
 - **[Getting Started Guide](../getting_started/quickstart)** - Complete setup and introduction
 - **[Core Concepts](/docs/concepts)** - Understanding Llama Stack fundamentals
 - **[Agents](./agent)** - Building intelligent agents
 - **[RAG (Retrieval Augmented Generation)](./rag)** - Knowledge-enhanced applications
 - **[Evaluations](./evals)** - Comprehensive evaluation framework
 - **[API Reference](/docs/api/llama-stack-specification)** - Complete API documentation
--- a/pyproject.toml
+++ b/pyproject.toml
@ -51,14 +51,6 @@ dependencies = [
    "sqlalchemy[asyncio]>=2.0.41",                    # server - for conversations
 ]
 [project.optional-dependencies]
 ui = [
    "streamlit",
    "pandas",
    "llama-stack-client>=0.3.0",
    "streamlit-option-menu",
 ]
 [dependency-groups]
 dev = [
    "pytest>=8.4",
--- a/src/llama_stack/core/ui/Containerfile
+++ b/src/llama_stack/core/ui/Containerfile
@ -1,11 +0,0 @@
 # More info on playground configuration can be found here:
 # https://llama-stack.readthedocs.io/en/latest/playground
 FROM python:3.12-slim
 WORKDIR /app
 COPY . /app/
 RUN /usr/local/bin/python -m pip install --upgrade pip && \
    /usr/local/bin/pip3 install -r requirements.txt
 EXPOSE 8501
 ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
--- a/src/llama_stack/core/ui/README.md
+++ b/src/llama_stack/core/ui/README.md
@ -1,50 +0,0 @@
 # (Experimental) LLama Stack UI
 ## Docker Setup
 :warning: This is a work in progress.
 ## Developer Setup
 1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll).
 ```
 llama stack list-deps together | xargs -L1 uv pip install
 llama stack run together
 ```
 2. (Optional) Register datasets and eval tasks as resources. If you want to run pre-configured evaluation flows (e.g. Evaluations (Generation + Scoring) Page).
 ```bash
 llama-stack-client datasets register \
 --dataset-id "mmlu" \
 --provider-id "huggingface" \
 --url "https://huggingface.co/datasets/llamastack/evals" \
 --metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
 --schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
 ```
 ```bash
 llama-stack-client benchmarks register \
 --eval-task-id meta-reference-mmlu \
 --provider-id meta-reference \
 --dataset-id mmlu \
 --scoring-functions basic::regex_parser_multiple_choice_answer
 ```
 3. Start Streamlit UI
 ```bash
 uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
 ```
 ## Environment Variables
 | Environment Variable       | Description                        | Default Value             |
 |----------------------------|------------------------------------|---------------------------|
 | LLAMA_STACK_ENDPOINT       | The endpoint for the Llama Stack   | http://localhost:8321     |
 | FIREWORKS_API_KEY          | API key for Fireworks provider     | (empty string)            |
 | TOGETHER_API_KEY           | API key for Together provider      | (empty string)            |
 | SAMBANOVA_API_KEY          | API key for SambaNova provider     | (empty string)            |
 | OPENAI_API_KEY             | API key for OpenAI provider        | (empty string)            |
--- a/src/llama_stack/core/ui/init.py
+++ b/src/llama_stack/core/ui/init.py
@ -1,5 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/src/llama_stack/core/ui/app.py
+++ b/src/llama_stack/core/ui/app.py
@ -1,55 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import streamlit as st
 def main():
    # Evaluation pages
    application_evaluation_page = st.Page(
        "page/evaluations/app_eval.py",
        title="Evaluations (Scoring)",
        icon="📊",
        default=False,
    )
    native_evaluation_page = st.Page(
        "page/evaluations/native_eval.py",
        title="Evaluations (Generation + Scoring)",
        icon="📊",
        default=False,
    )
    # Playground pages
    chat_page = st.Page("page/playground/chat.py", title="Chat", icon="💬", default=True)
    rag_page = st.Page("page/playground/rag.py", title="RAG", icon="💬", default=False)
    tool_page = st.Page("page/playground/tools.py", title="Tools", icon="🛠", default=False)
    # Distribution pages
    resources_page = st.Page("page/distribution/resources.py", title="Resources", icon="🔍", default=False)
    provider_page = st.Page(
        "page/distribution/providers.py",
        title="API Providers",
        icon="🔍",
        default=False,
    )
    pg = st.navigation(
        {
            "Playground": [
                chat_page,
                rag_page,
                tool_page,
                application_evaluation_page,
                native_evaluation_page,
            ],
            "Inspect": [provider_page, resources_page],
        },
        expanded=False,
    )
    pg.run()
 if __name__ == "__main__":
    main()
--- a/src/llama_stack/core/ui/modules/init.py
+++ b/src/llama_stack/core/ui/modules/init.py
@ -1,5 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/src/llama_stack/core/ui/modules/api.py
+++ b/src/llama_stack/core/ui/modules/api.py
@ -1,32 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import os
 from llama_stack_client import LlamaStackClient
 class LlamaStackApi:
    def __init__(self):
        self.client = LlamaStackClient(
            base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:8321"),
            provider_data={
                "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""),
                "together_api_key": os.environ.get("TOGETHER_API_KEY", ""),
                "sambanova_api_key": os.environ.get("SAMBANOVA_API_KEY", ""),
                "openai_api_key": os.environ.get("OPENAI_API_KEY", ""),
                "tavily_search_api_key": os.environ.get("TAVILY_SEARCH_API_KEY", ""),
            },
        )
    def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
        """Run scoring on a single row"""
        if not scoring_params:
            scoring_params = dict.fromkeys(scoring_function_ids)
        return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
 llama_stack_api = LlamaStackApi()
--- a/src/llama_stack/core/ui/modules/utils.py
+++ b/src/llama_stack/core/ui/modules/utils.py
@ -1,42 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import base64
 import os
 import pandas as pd
 import streamlit as st
 def process_dataset(file):
    if file is None:
        return "No file uploaded", None
    try:
        # Determine file type and read accordingly
        file_ext = os.path.splitext(file.name)[1].lower()
        if file_ext == ".csv":
            df = pd.read_csv(file)
        elif file_ext in [".xlsx", ".xls"]:
            df = pd.read_excel(file)
        else:
            return "Unsupported file format. Please upload a CSV or Excel file.", None
        return df
    except Exception as e:
        st.error(f"Error processing file: {str(e)}")
        return None
 def data_url_from_file(file) -> str:
    file_content = file.getvalue()
    base64_content = base64.b64encode(file_content).decode("utf-8")
    mime_type = file.type
    data_url = f"data:{mime_type};base64,{base64_content}"
    return data_url
--- a/src/llama_stack/core/ui/page/init.py
+++ b/src/llama_stack/core/ui/page/init.py
@ -1,5 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/src/llama_stack/core/ui/page/distribution/init.py
+++ b/src/llama_stack/core/ui/page/distribution/init.py
@ -1,5 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/src/llama_stack/core/ui/page/distribution/datasets.py
+++ b/src/llama_stack/core/ui/page/distribution/datasets.py
@ -1,18 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import streamlit as st
 from llama_stack.core.ui.modules.api import llama_stack_api
 def datasets():
    st.header("Datasets")
    datasets_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()}
    if len(datasets_info) > 0:
        selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
        st.json(datasets_info[selected_dataset], expanded=True)
--- a/src/llama_stack/core/ui/page/distribution/eval_tasks.py
+++ b/src/llama_stack/core/ui/page/distribution/eval_tasks.py
@ -1,20 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import streamlit as st
 from llama_stack.core.ui.modules.api import llama_stack_api
 def benchmarks():
    # Benchmarks Section
    st.header("Benchmarks")
    benchmarks_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.benchmarks.list()}
    if len(benchmarks_info) > 0:
        selected_benchmark = st.selectbox("Select an eval task", list(benchmarks_info.keys()), key="benchmark_inspect")
        st.json(benchmarks_info[selected_benchmark], expanded=True)
--- a/src/llama_stack/core/ui/page/distribution/models.py
+++ b/src/llama_stack/core/ui/page/distribution/models.py
@ -1,18 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import streamlit as st
 from llama_stack.core.ui.modules.api import llama_stack_api
 def models():
    # Models Section
    st.header("Models")
    models_info = {m.id: m.model_dump() for m in llama_stack_api.client.models.list()}
    selected_model = st.selectbox("Select a model", list(models_info.keys()))
    st.json(models_info[selected_model])
--- a/src/llama_stack/core/ui/page/distribution/providers.py
+++ b/src/llama_stack/core/ui/page/distribution/providers.py
@ -1,27 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import streamlit as st
 from llama_stack.core.ui.modules.api import llama_stack_api
 def providers():
    st.header("🔍 API Providers")
    apis_providers_lst = llama_stack_api.client.providers.list()
    api_to_providers = {}
    for api_provider in apis_providers_lst:
        if api_provider.api in api_to_providers:
            api_to_providers[api_provider.api].append(api_provider)
        else:
            api_to_providers[api_provider.api] = [api_provider]
    for api in api_to_providers.keys():
        st.markdown(f"###### {api}")
        st.dataframe([x.to_dict() for x in api_to_providers[api]], width=500)
 providers()
--- a/src/llama_stack/core/ui/page/distribution/resources.py
+++ b/src/llama_stack/core/ui/page/distribution/resources.py
@ -1,48 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from streamlit_option_menu import option_menu
 from llama_stack.core.ui.page.distribution.datasets import datasets
 from llama_stack.core.ui.page.distribution.eval_tasks import benchmarks
 from llama_stack.core.ui.page.distribution.models import models
 from llama_stack.core.ui.page.distribution.scoring_functions import scoring_functions
 from llama_stack.core.ui.page.distribution.shields import shields
 def resources_page():
    options = [
        "Models",
        "Shields",
        "Scoring Functions",
        "Datasets",
        "Benchmarks",
    ]
    icons = ["magic", "shield", "file-bar-graph", "database", "list-task"]
    selected_resource = option_menu(
        None,
        options,
        icons=icons,
        orientation="horizontal",
        styles={
            "nav-link": {
                "font-size": "12px",
            },
        },
    )
    if selected_resource == "Benchmarks":
        benchmarks()
    elif selected_resource == "Datasets":
        datasets()
    elif selected_resource == "Models":
        models()
    elif selected_resource == "Scoring Functions":
        scoring_functions()
    elif selected_resource == "Shields":
        shields()
 resources_page()
--- a/src/llama_stack/core/ui/page/distribution/scoring_functions.py
+++ b/src/llama_stack/core/ui/page/distribution/scoring_functions.py
@ -1,18 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import streamlit as st
 from llama_stack.core.ui.modules.api import llama_stack_api
 def scoring_functions():
    st.header("Scoring Functions")
    scoring_functions_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.scoring_functions.list()}
    selected_scoring_function = st.selectbox("Select a scoring function", list(scoring_functions_info.keys()))
    st.json(scoring_functions_info[selected_scoring_function], expanded=True)
--- a/src/llama_stack/core/ui/page/distribution/shields.py
+++ b/src/llama_stack/core/ui/page/distribution/shields.py
@ -1,19 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import streamlit as st
 from llama_stack.core.ui.modules.api import llama_stack_api
 def shields():
    # Shields Section
    st.header("Shields")
    shields_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.shields.list()}
    selected_shield = st.selectbox("Select a shield", list(shields_info.keys()))
    st.json(shields_info[selected_shield])
--- a/src/llama_stack/core/ui/page/evaluations/init.py
+++ b/src/llama_stack/core/ui/page/evaluations/init.py
@ -1,5 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/src/llama_stack/core/ui/page/evaluations/app_eval.py
+++ b/src/llama_stack/core/ui/page/evaluations/app_eval.py
@ -1,143 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import json
 import pandas as pd
 import streamlit as st
 from llama_stack.core.ui.modules.api import llama_stack_api
 from llama_stack.core.ui.modules.utils import process_dataset
 def application_evaluation_page():
    st.set_page_config(page_title="Evaluations (Scoring)", page_icon="🦙")
    st.title("📊 Evaluations (Scoring)")
    # File uploader
    uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"])
    if uploaded_file is None:
        st.error("No file uploaded")
        return
    # Process uploaded file
    df = process_dataset(uploaded_file)
    if df is None:
        st.error("Error processing file")
        return
    # Display dataset information
    st.success("Dataset loaded successfully!")
    # Display dataframe preview
    st.subheader("Dataset Preview")
    st.dataframe(df)
    # Select Scoring Functions to Run Evaluation On
    st.subheader("Select Scoring Functions")
    scoring_functions = llama_stack_api.client.scoring_functions.list()
    scoring_functions = {sf.identifier: sf for sf in scoring_functions}
    scoring_functions_names = list(scoring_functions.keys())
    selected_scoring_functions = st.multiselect(
        "Choose one or more scoring functions",
        options=scoring_functions_names,
        help="Choose one or more scoring functions.",
    )
    available_models = llama_stack_api.client.models.list()
    available_models = [m.identifier for m in available_models]
    scoring_params = {}
    if selected_scoring_functions:
        st.write("Selected:")
        for scoring_fn_id in selected_scoring_functions:
            scoring_fn = scoring_functions[scoring_fn_id]
            st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}")
            new_params = None
            if scoring_fn.params:
                new_params = {}
                for param_name, param_value in scoring_fn.params.to_dict().items():
                    if param_name == "type":
                        new_params[param_name] = param_value
                        continue
                    if param_name == "judge_model":
                        value = st.selectbox(
                            f"Select **{param_name}** for {scoring_fn_id}",
                            options=available_models,
                            index=0,
                            key=f"{scoring_fn_id}_{param_name}",
                        )
                        new_params[param_name] = value
                    else:
                        value = st.text_area(
                            f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format",
                            value=json.dumps(param_value, indent=2),
                            height=80,
                        )
                        try:
                            new_params[param_name] = json.loads(value)
                        except json.JSONDecodeError:
                            st.error(f"Invalid JSON for **{param_name}** in {scoring_fn_id}")
                st.json(new_params)
            scoring_params[scoring_fn_id] = new_params
        # Add run evaluation button & slider
        total_rows = len(df)
        num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows)
        if st.button("Run Evaluation"):
            progress_text = "Running evaluation..."
            progress_bar = st.progress(0, text=progress_text)
            rows = df.to_dict(orient="records")
            if num_rows < total_rows:
                rows = rows[:num_rows]
            # Create separate containers for progress text and results
            progress_text_container = st.empty()
            results_container = st.empty()
            output_res = {}
            for i, r in enumerate(rows):
                # Update progress
                progress = i / len(rows)
                progress_bar.progress(progress, text=progress_text)
                # Run evaluation for current row
                score_res = llama_stack_api.run_scoring(
                    r,
                    scoring_function_ids=selected_scoring_functions,
                    scoring_params=scoring_params,
                )
                for k in r.keys():
                    if k not in output_res:
                        output_res[k] = []
                    output_res[k].append(r[k])
                for fn_id in selected_scoring_functions:
                    if fn_id not in output_res:
                        output_res[fn_id] = []
                    output_res[fn_id].append(score_res.results[fn_id].score_rows[0])
                # Display current row results using separate containers
                progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})")
                results_container.json(
                    score_res.to_json(),
                    expanded=2,
                )
            progress_bar.progress(1.0, text="Evaluation complete!")
            # Display results in dataframe
            if output_res:
                output_df = pd.DataFrame(output_res)
                st.subheader("Evaluation Results")
                st.dataframe(output_df)
 application_evaluation_page()
--- a/src/llama_stack/core/ui/page/evaluations/native_eval.py
+++ b/src/llama_stack/core/ui/page/evaluations/native_eval.py
@ -1,253 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import json
 import pandas as pd
 import streamlit as st
 from llama_stack.core.ui.modules.api import llama_stack_api
 def select_benchmark_1():
    # Select Benchmarks
    st.subheader("1. Choose An Eval Task")
    benchmarks = llama_stack_api.client.benchmarks.list()
    benchmarks = {et.identifier: et for et in benchmarks}
    benchmarks_names = list(benchmarks.keys())
    selected_benchmark = st.selectbox(
        "Choose an eval task.",
        options=benchmarks_names,
        help="Choose an eval task. Each eval task is parameterized by a dataset, and list of scoring functions.",
    )
    with st.expander("View Eval Task"):
        st.json(benchmarks[selected_benchmark], expanded=True)
    st.session_state["selected_benchmark"] = selected_benchmark
    st.session_state["benchmarks"] = benchmarks
    if st.button("Confirm", key="confirm_1"):
        st.session_state["selected_benchmark_1_next"] = True
 def define_eval_candidate_2():
    if not st.session_state.get("selected_benchmark_1_next", None):
        return
    st.subheader("2. Define Eval Candidate")
    st.info(
        """
        Define the configurations for the evaluation candidate model or agent used for generation.
        Select "model" if you want to run generation with inference API, or "agent" if you want to run generation with agent API through specifying AgentConfig.
        """
    )
    with st.expander("Define Eval Candidate", expanded=True):
        # Define Eval Candidate
        candidate_type = st.radio("Candidate Type", ["model", "agent"])
        available_models = llama_stack_api.client.models.list()
        available_models = [model.identifier for model in available_models]
        selected_model = st.selectbox(
            "Choose a model",
            available_models,
            index=0,
        )
        # Sampling Parameters
        st.markdown("##### Sampling Parameters")
        temperature = st.slider(
            "Temperature",
            min_value=0.0,
            max_value=1.0,
            value=0.0,
            step=0.1,
            help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
        )
        top_p = st.slider(
            "Top P",
            min_value=0.0,
            max_value=1.0,
            value=0.95,
            step=0.1,
        )
        max_tokens = st.slider(
            "Max Tokens",
            min_value=0,
            max_value=4096,
            value=512,
            step=1,
            help="The maximum number of tokens to generate",
        )
        repetition_penalty = st.slider(
            "Repetition Penalty",
            min_value=1.0,
            max_value=2.0,
            value=1.0,
            step=0.1,
            help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
        )
        if candidate_type == "model":
            if temperature > 0.0:
                strategy = {
                    "type": "top_p",
                    "temperature": temperature,
                    "top_p": top_p,
                }
            else:
                strategy = {"type": "greedy"}
            eval_candidate = {
                "type": "model",
                "model": selected_model,
                "sampling_params": {
                    "strategy": strategy,
                    "max_tokens": max_tokens,
                    "repetition_penalty": repetition_penalty,
                },
            }
        elif candidate_type == "agent":
            system_prompt = st.text_area(
                "System Prompt",
                value="You are a helpful AI assistant.",
                help="Initial instructions given to the AI to set its behavior and context",
            )
            tools_json = st.text_area(
                "Tools Configuration (JSON)",
                value=json.dumps(
                    [
                        {
                            "type": "brave_search",
                            "engine": "brave",
                            "api_key": "ENTER_BRAVE_API_KEY_HERE",
                        }
                    ]
                ),
                help="Enter tool configurations in JSON format. Each tool should have a name, description, and parameters.",
                height=200,
            )
            try:
                tools = json.loads(tools_json)
            except json.JSONDecodeError:
                st.error("Invalid JSON format for tools configuration")
                tools = []
            eval_candidate = {
                "type": "agent",
                "config": {
                    "model": selected_model,
                    "instructions": system_prompt,
                    "tools": tools,
                    "tool_choice": "auto",
                    "tool_prompt_format": "json",
                    "input_shields": [],
                    "output_shields": [],
                    "enable_session_persistence": False,
                },
            }
        st.session_state["eval_candidate"] = eval_candidate
    if st.button("Confirm", key="confirm_2"):
        st.session_state["selected_eval_candidate_2_next"] = True
 def run_evaluation_3():
    if not st.session_state.get("selected_eval_candidate_2_next", None):
        return
    st.subheader("3. Run Evaluation")
    # Add info box to explain configurations being used
    st.info(
        """
        Review the configurations that will be used for this evaluation run, make any necessary changes, and then click the "Run Evaluation" button.
        """
    )
    selected_benchmark = st.session_state["selected_benchmark"]
    benchmarks = st.session_state["benchmarks"]
    eval_candidate = st.session_state["eval_candidate"]
    dataset_id = benchmarks[selected_benchmark].dataset_id
    rows = llama_stack_api.client.datasets.iterrows(
        dataset_id=dataset_id,
    )
    total_rows = len(rows.data)
    # Add number of examples control
    num_rows = st.number_input(
        "Number of Examples to Evaluate",
        min_value=1,
        max_value=total_rows,
        value=5,
        help="Number of examples from the dataset to evaluate. ",
    )
    benchmark_config = {
        "type": "benchmark",
        "eval_candidate": eval_candidate,
        "scoring_params": {},
    }
    with st.expander("View Evaluation Task", expanded=True):
        st.json(benchmarks[selected_benchmark], expanded=True)
    with st.expander("View Evaluation Task Configuration", expanded=True):
        st.json(benchmark_config, expanded=True)
    # Add run button and handle evaluation
    if st.button("Run Evaluation"):
        progress_text = "Running evaluation..."
        progress_bar = st.progress(0, text=progress_text)
        rows = rows.data
        if num_rows < total_rows:
            rows = rows[:num_rows]
        # Create separate containers for progress text and results
        progress_text_container = st.empty()
        results_container = st.empty()
        output_res = {}
        for i, r in enumerate(rows):
            # Update progress
            progress = i / len(rows)
            progress_bar.progress(progress, text=progress_text)
            # Run evaluation for current row
            eval_res = llama_stack_api.client.eval.evaluate_rows(
                benchmark_id=selected_benchmark,
                input_rows=[r],
                scoring_functions=benchmarks[selected_benchmark].scoring_functions,
                benchmark_config=benchmark_config,
            )
            for k in r.keys():
                if k not in output_res:
                    output_res[k] = []
                output_res[k].append(r[k])
            for k in eval_res.generations[0].keys():
                if k not in output_res:
                    output_res[k] = []
                output_res[k].append(eval_res.generations[0][k])
            for scoring_fn in benchmarks[selected_benchmark].scoring_functions:
                if scoring_fn not in output_res:
                    output_res[scoring_fn] = []
                output_res[scoring_fn].append(eval_res.scores[scoring_fn].score_rows[0])
            progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})")
            results_container.json(eval_res, expanded=2)
        progress_bar.progress(1.0, text="Evaluation complete!")
        # Display results in dataframe
        if output_res:
            output_df = pd.DataFrame(output_res)
            st.subheader("Evaluation Results")
            st.dataframe(output_df)
 def native_evaluation_page():
    st.set_page_config(page_title="Evaluations (Generation + Scoring)", page_icon="🦙")
    st.title("📊 Evaluations (Generation + Scoring)")
    select_benchmark_1()
    define_eval_candidate_2()
    run_evaluation_3()
 native_evaluation_page()
--- a/src/llama_stack/core/ui/page/playground/init.py
+++ b/src/llama_stack/core/ui/page/playground/init.py
@ -1,5 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/src/llama_stack/core/ui/page/playground/chat.py
+++ b/src/llama_stack/core/ui/page/playground/chat.py
@ -1,134 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import streamlit as st
 from llama_stack.core.ui.modules.api import llama_stack_api
 # Sidebar configurations
 with st.sidebar:
    st.header("Configuration")
    available_models = llama_stack_api.client.models.list()
    available_models = [
        model.id
        for model in available_models
        if model.custom_metadata and model.custom_metadata.get("model_type") == "llm"
    ]
    selected_model = st.selectbox(
        "Choose a model",
        available_models,
        index=0,
    )
    temperature = st.slider(
        "Temperature",
        min_value=0.0,
        max_value=1.0,
        value=0.0,
        step=0.1,
        help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
    )
    top_p = st.slider(
        "Top P",
        min_value=0.0,
        max_value=1.0,
        value=0.95,
        step=0.1,
    )
    max_tokens = st.slider(
        "Max Tokens",
        min_value=0,
        max_value=4096,
        value=512,
        step=1,
        help="The maximum number of tokens to generate",
    )
    repetition_penalty = st.slider(
        "Repetition Penalty",
        min_value=1.0,
        max_value=2.0,
        value=1.0,
        step=0.1,
        help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
    )
    stream = st.checkbox("Stream", value=True)
    system_prompt = st.text_area(
        "System Prompt",
        value="You are a helpful AI assistant.",
        help="Initial instructions given to the AI to set its behavior and context",
    )
    # Add clear chat button to sidebar
    if st.button("Clear Chat", use_container_width=True):
        st.session_state.messages = []
        st.rerun()
 # Main chat interface
 st.title("🦙 Chat")
 # Initialize chat history
 if "messages" not in st.session_state:
    st.session_state.messages = []
 # Display chat messages
 for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])
 # Chat input
 if prompt := st.chat_input("Example: What is Llama Stack?"):
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})
    # Display user message
    with st.chat_message("user"):
        st.markdown(prompt)
    # Display assistant response
    with st.chat_message("assistant"):
        message_placeholder = st.empty()
        full_response = ""
        if temperature > 0.0:
            strategy = {
                "type": "top_p",
                "temperature": temperature,
                "top_p": top_p,
            }
        else:
            strategy = {"type": "greedy"}
        response = llama_stack_api.client.inference.chat_completion(
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": prompt},
            ],
            model_id=selected_model,
            stream=stream,
            sampling_params={
                "strategy": strategy,
                "max_tokens": max_tokens,
                "repetition_penalty": repetition_penalty,
            },
        )
        if stream:
            for chunk in response:
                if chunk.event.event_type == "progress":
                    full_response += chunk.event.delta.text
                message_placeholder.markdown(full_response + "▌")
            message_placeholder.markdown(full_response)
        else:
            full_response = response.completion_message.content
            message_placeholder.markdown(full_response)
        st.session_state.messages.append({"role": "assistant", "content": full_response})
--- a/src/llama_stack/core/ui/page/playground/tools.py
+++ b/src/llama_stack/core/ui/page/playground/tools.py
@ -1,352 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import enum
 import json
 import uuid
 import streamlit as st
 from llama_stack_client import Agent
 from llama_stack_client.lib.agents.react.agent import ReActAgent
 from llama_stack_client.lib.agents.react.tool_parser import ReActOutput
 from llama_stack.core.ui.modules.api import llama_stack_api
 class AgentType(enum.Enum):
    REGULAR = "Regular"
    REACT = "ReAct"
 def tool_chat_page():
    st.title("🛠 Tools")
    client = llama_stack_api.client
    models = client.models.list()
    model_list = [model.identifier for model in models if model.api_model_type == "llm"]
    tool_groups = client.toolgroups.list()
    tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
    mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
    builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
    selected_vector_stores = []
    def reset_agent():
        st.session_state.clear()
        st.cache_resource.clear()
    with st.sidebar:
        st.title("Configuration")
        st.subheader("Model")
        model = st.selectbox(label="Model", options=model_list, on_change=reset_agent, label_visibility="collapsed")
        st.subheader("Available ToolGroups")
        toolgroup_selection = st.pills(
            label="Built-in tools",
            options=builtin_tools_list,
            selection_mode="multi",
            on_change=reset_agent,
            format_func=lambda tool: "".join(tool.split("::")[1:]),
            help="List of built-in tools from your llama stack server.",
        )
        if "builtin::rag" in toolgroup_selection:
            vector_stores = llama_stack_api.client.vector_stores.list() or []
            if not vector_stores:
                st.info("No vector databases available for selection.")
            vector_stores = [vector_store.identifier for vector_store in vector_stores]
            selected_vector_stores = st.multiselect(
                label="Select Document Collections to use in RAG queries",
                options=vector_stores,
                on_change=reset_agent,
            )
        mcp_selection = st.pills(
            label="MCP Servers",
            options=mcp_tools_list,
            selection_mode="multi",
            on_change=reset_agent,
            format_func=lambda tool: "".join(tool.split("::")[1:]),
            help="List of MCP servers registered to your llama stack server.",
        )
        toolgroup_selection.extend(mcp_selection)
        grouped_tools = {}
        total_tools = 0
        for toolgroup_id in toolgroup_selection:
            tools = client.tools.list(toolgroup_id=toolgroup_id)
            grouped_tools[toolgroup_id] = [tool.name for tool in tools]
            total_tools += len(tools)
        st.markdown(f"Active Tools: 🛠 {total_tools}")
        for group_id, tools in grouped_tools.items():
            with st.expander(f"🔧 Tools from `{group_id}`"):
                for idx, tool in enumerate(tools, start=1):
                    st.markdown(f"{idx}. `{tool.split(':')[-1]}`")
        st.subheader("Agent Configurations")
        st.subheader("Agent Type")
        agent_type = st.radio(
            label="Select Agent Type",
            options=["Regular", "ReAct"],
            on_change=reset_agent,
        )
        if agent_type == "ReAct":
            agent_type = AgentType.REACT
        else:
            agent_type = AgentType.REGULAR
        max_tokens = st.slider(
            "Max Tokens",
            min_value=0,
            max_value=4096,
            value=512,
            step=64,
            help="The maximum number of tokens to generate",
            on_change=reset_agent,
        )
    for i, tool_name in enumerate(toolgroup_selection):
        if tool_name == "builtin::rag":
            tool_dict = dict(
                name="builtin::rag",
                args={
                    "vector_store_ids": list(selected_vector_stores),
                },
            )
            toolgroup_selection[i] = tool_dict
    @st.cache_resource
    def create_agent():
        if "agent_type" in st.session_state and st.session_state.agent_type == AgentType.REACT:
            return ReActAgent(
                client=client,
                model=model,
                tools=toolgroup_selection,
                response_format={
                    "type": "json_schema",
                    "json_schema": ReActOutput.model_json_schema(),
                },
                sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
            )
        else:
            return Agent(
                client,
                model=model,
                instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
                tools=toolgroup_selection,
                sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
            )
    st.session_state.agent_type = agent_type
    agent = create_agent()
    if "agent_session_id" not in st.session_state:
        st.session_state["agent_session_id"] = agent.create_session(session_name=f"tool_demo_{uuid.uuid4()}")
    session_id = st.session_state["agent_session_id"]
    if "messages" not in st.session_state:
        st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
    for msg in st.session_state.messages:
        with st.chat_message(msg["role"]):
            st.markdown(msg["content"])
    if prompt := st.chat_input(placeholder=""):
        with st.chat_message("user"):
            st.markdown(prompt)
        st.session_state.messages.append({"role": "user", "content": prompt})
        turn_response = agent.create_turn(
            session_id=session_id,
            messages=[{"role": "user", "content": prompt}],
            stream=True,
        )
        def response_generator(turn_response):
            if st.session_state.get("agent_type") == AgentType.REACT:
                return _handle_react_response(turn_response)
            else:
                return _handle_regular_response(turn_response)
        def _handle_react_response(turn_response):
            current_step_content = ""
            final_answer = None
            tool_results = []
            for response in turn_response:
                if not hasattr(response.event, "payload"):
                    yield (
                        "\n\n🚨 :red[_Llama Stack server Error:_]\n"
                        "The response received is missing an expected `payload` attribute.\n"
                        "This could indicate a malformed response or an internal issue within the server.\n\n"
                        f"Error details: {response}"
                    )
                    return
                payload = response.event.payload
                if payload.event_type == "step_progress" and hasattr(payload.delta, "text"):
                    current_step_content += payload.delta.text
                    continue
                if payload.event_type == "step_complete":
                    step_details = payload.step_details
                    if step_details.step_type == "inference":
                        yield from _process_inference_step(current_step_content, tool_results, final_answer)
                        current_step_content = ""
                    elif step_details.step_type == "tool_execution":
                        tool_results = _process_tool_execution(step_details, tool_results)
                        current_step_content = ""
                    else:
                        current_step_content = ""
            if not final_answer and tool_results:
                yield from _format_tool_results_summary(tool_results)
        def _process_inference_step(current_step_content, tool_results, final_answer):
            try:
                react_output_data = json.loads(current_step_content)
                thought = react_output_data.get("thought")
                action = react_output_data.get("action")
                answer = react_output_data.get("answer")
                if answer and answer != "null" and answer is not None:
                    final_answer = answer
                if thought:
                    with st.expander("🤔 Thinking...", expanded=False):
                        st.markdown(f":grey[__{thought}__]")
                if action and isinstance(action, dict):
                    tool_name = action.get("tool_name")
                    tool_params = action.get("tool_params")
                    with st.expander(f'🛠 Action: Using tool "{tool_name}"', expanded=False):
                        st.json(tool_params)
                if answer and answer != "null" and answer is not None:
                    yield f"\n\n✅ **Final Answer:**\n{answer}"
            except json.JSONDecodeError:
                yield f"\n\nFailed to parse ReAct step content:\n```json\n{current_step_content}\n```"
            except Exception as e:
                yield f"\n\nFailed to process ReAct step: {e}\n```json\n{current_step_content}\n```"
            return final_answer
        def _process_tool_execution(step_details, tool_results):
            try:
                if hasattr(step_details, "tool_responses") and step_details.tool_responses:
                    for tool_response in step_details.tool_responses:
                        tool_name = tool_response.tool_name
                        content = tool_response.content
                        tool_results.append((tool_name, content))
                        with st.expander(f'⚙️ Observation (Result from "{tool_name}")', expanded=False):
                            try:
                                parsed_content = json.loads(content)
                                st.json(parsed_content)
                            except json.JSONDecodeError:
                                st.code(content, language=None)
                else:
                    with st.expander("⚙️ Observation", expanded=False):
                        st.markdown(":grey[_Tool execution step completed, but no response data found._]")
            except Exception as e:
                with st.expander("⚙️ Error in Tool Execution", expanded=False):
                    st.markdown(f":red[_Error processing tool execution: {str(e)}_]")
            return tool_results
        def _format_tool_results_summary(tool_results):
            yield "\n\n**Here's what I found:**\n"
            for tool_name, content in tool_results:
                try:
                    parsed_content = json.loads(content)
                    if tool_name == "web_search" and "top_k" in parsed_content:
                        yield from _format_web_search_results(parsed_content)
                    elif "results" in parsed_content and isinstance(parsed_content["results"], list):
                        yield from _format_results_list(parsed_content["results"])
                    elif isinstance(parsed_content, dict) and len(parsed_content) > 0:
                        yield from _format_dict_results(parsed_content)
                    elif isinstance(parsed_content, list) and len(parsed_content) > 0:
                        yield from _format_list_results(parsed_content)
                except json.JSONDecodeError:
                    yield f"\n**{tool_name}** was used but returned complex data. Check the observation for details.\n"
                except (TypeError, AttributeError, KeyError, IndexError) as e:
                    print(f"Error processing {tool_name} result: {type(e).__name__}: {e}")
        def _format_web_search_results(parsed_content):
            for i, result in enumerate(parsed_content["top_k"], 1):
                if i <= 3:
                    title = result.get("title", "Untitled")
                    url = result.get("url", "")
                    content_text = result.get("content", "").strip()
                    yield f"\n- **{title}**\n  {content_text}\n  [Source]({url})\n"
        def _format_results_list(results):
            for i, result in enumerate(results, 1):
                if i <= 3:
                    if isinstance(result, dict):
                        name = result.get("name", result.get("title", "Result " + str(i)))
                        description = result.get("description", result.get("content", result.get("summary", "")))
                        yield f"\n- **{name}**\n  {description}\n"
                    else:
                        yield f"\n- {result}\n"
        def _format_dict_results(parsed_content):
            yield "\n```\n"
            for key, value in list(parsed_content.items())[:5]:
                if isinstance(value, str) and len(value) < 100:
                    yield f"{key}: {value}\n"
                else:
                    yield f"{key}: [Complex data]\n"
            yield "```\n"
        def _format_list_results(parsed_content):
            yield "\n"
            for _, item in enumerate(parsed_content[:3], 1):
                if isinstance(item, str):
                    yield f"- {item}\n"
                elif isinstance(item, dict) and "text" in item:
                    yield f"- {item['text']}\n"
                elif isinstance(item, dict) and len(item) > 0:
                    first_value = next(iter(item.values()))
                    if isinstance(first_value, str) and len(first_value) < 100:
                        yield f"- {first_value}\n"
        def _handle_regular_response(turn_response):
            for response in turn_response:
                if hasattr(response.event, "payload"):
                    print(response.event.payload)
                    if response.event.payload.event_type == "step_progress":
                        if hasattr(response.event.payload.delta, "text"):
                            yield response.event.payload.delta.text
                    if response.event.payload.event_type == "step_complete":
                        if response.event.payload.step_details.step_type == "tool_execution":
                            if response.event.payload.step_details.tool_calls:
                                tool_name = str(response.event.payload.step_details.tool_calls[0].tool_name)
                                yield f'\n\n🛠 :grey[_Using "{tool_name}" tool:_]\n\n'
                            else:
                                yield "No tool_calls present in step_details"
                else:
                    yield f"Error occurred in the Llama Stack Cluster: {response}"
        with st.chat_message("assistant"):
            response_content = st.write_stream(response_generator(turn_response))
        st.session_state.messages.append({"role": "assistant", "content": response_content})
 tool_chat_page()
--- a/src/llama_stack/core/ui/requirements.txt
+++ b/src/llama_stack/core/ui/requirements.txt
@ -1,5 +0,0 @@
 llama-stack>=0.2.1
 llama-stack-client>=0.2.1
 pandas
 streamlit
 streamlit-option-menu
--- a/uv.lock
+++ b/uv.lock
@ -1963,14 +1963,6 @@ dependencies = [
    { name = "uvicorn" },
 ]
 [package.optional-dependencies]
 ui = [
    { name = "llama-stack-client" },
    { name = "pandas" },
    { name = "streamlit" },
    { name = "streamlit-option-menu" },
 ]
 [package.dev-dependencies]
 benchmark = [
    { name = "locust" },
@ -2097,11 +2089,9 @@ requires-dist = [
    { name = "jinja2", specifier = ">=3.1.6" },
    { name = "jsonschema" },
    { name = "llama-stack-client", specifier = ">=0.3.0" },
    { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.3.0" },
    { name = "openai", specifier = ">=2.5.0" },
    { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
    { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
    { name = "pandas", marker = "extra == 'ui'" },
    { name = "pillow" },
    { name = "prompt-toolkit" },
    { name = "pydantic", specifier = ">=2.11.9" },
@ -2111,13 +2101,10 @@ requires-dist = [
    { name = "rich" },
    { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
    { name = "starlette" },
    { name = "streamlit", marker = "extra == 'ui'" },
    { name = "streamlit-option-menu", marker = "extra == 'ui'" },
    { name = "termcolor" },
    { name = "tiktoken" },
    { name = "uvicorn", specifier = ">=0.34.0" },
 ]
 provides-extras = ["ui"]
 [package.metadata.requires-dev]
 benchmark = [{ name = "locust", specifier = ">=2.39.1" }]