mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
chore(ui): remove the Streamlit UI (#4097)
This commit is contained in:
parent
939a2db58f
commit
a2c4c12384
27 changed files with 0 additions and 1597 deletions
|
|
@ -35,9 +35,6 @@ Here are the key topics that will help you build effective AI applications:
|
||||||
- **[Telemetry](./telemetry.mdx)** - Monitor and analyze your agents' performance and behavior
|
- **[Telemetry](./telemetry.mdx)** - Monitor and analyze your agents' performance and behavior
|
||||||
- **[Safety](./safety.mdx)** - Implement guardrails and safety measures to ensure responsible AI behavior
|
- **[Safety](./safety.mdx)** - Implement guardrails and safety measures to ensure responsible AI behavior
|
||||||
|
|
||||||
### 🎮 **Interactive Development**
|
|
||||||
- **[Playground](./playground.mdx)** - Interactive environment for testing and developing applications
|
|
||||||
|
|
||||||
## Application Patterns
|
## Application Patterns
|
||||||
|
|
||||||
### 🤖 **Conversational Agents**
|
### 🤖 **Conversational Agents**
|
||||||
|
|
|
||||||
|
|
@ -1,298 +0,0 @@
|
||||||
---
|
|
||||||
title: Llama Stack Playground
|
|
||||||
description: Interactive interface to explore and experiment with Llama Stack capabilities
|
|
||||||
sidebar_label: Playground
|
|
||||||
sidebar_position: 10
|
|
||||||
---
|
|
||||||
|
|
||||||
import Tabs from '@theme/Tabs';
|
|
||||||
import TabItem from '@theme/TabItem';
|
|
||||||
|
|
||||||
# Llama Stack Playground
|
|
||||||
|
|
||||||
:::note[Experimental Feature]
|
|
||||||
The Llama Stack Playground is currently experimental and subject to change. We welcome feedback and contributions to help improve it.
|
|
||||||
:::
|
|
||||||
|
|
||||||
The Llama Stack Playground is a simple interface that aims to:
|
|
||||||
- **Showcase capabilities and concepts** of Llama Stack in an interactive environment
|
|
||||||
- **Demo end-to-end application code** to help users get started building their own applications
|
|
||||||
- **Provide a UI** to help users inspect and understand Llama Stack API providers and resources
|
|
||||||
|
|
||||||
## Key Features
|
|
||||||
|
|
||||||
### Interactive Playground Pages
|
|
||||||
|
|
||||||
The playground provides interactive pages for users to explore Llama Stack API capabilities:
|
|
||||||
|
|
||||||
#### Chatbot Interface
|
|
||||||
|
|
||||||
<video
|
|
||||||
controls
|
|
||||||
autoPlay
|
|
||||||
playsInline
|
|
||||||
muted
|
|
||||||
loop
|
|
||||||
style={{width: '100%'}}
|
|
||||||
>
|
|
||||||
<source src="https://github.com/user-attachments/assets/8d2ef802-5812-4a28-96e1-316038c84cbf" type="video/mp4" />
|
|
||||||
Your browser does not support the video tag.
|
|
||||||
</video>
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="chat" label="Chat">
|
|
||||||
|
|
||||||
**Simple Chat Interface**
|
|
||||||
- Chat directly with Llama models through an intuitive interface
|
|
||||||
- Uses the `/chat/completions` streaming API under the hood
|
|
||||||
- Real-time message streaming for responsive interactions
|
|
||||||
- Perfect for testing model capabilities and prompt engineering
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="rag" label="RAG Chat">
|
|
||||||
|
|
||||||
**Document-Aware Conversations**
|
|
||||||
- Upload documents to create memory banks
|
|
||||||
- Chat with a RAG-enabled agent that can query your documents
|
|
||||||
- Uses Llama Stack's `/agents` API to create and manage RAG sessions
|
|
||||||
- Ideal for exploring knowledge-enhanced AI applications
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
#### Evaluation Interface
|
|
||||||
|
|
||||||
<video
|
|
||||||
controls
|
|
||||||
autoPlay
|
|
||||||
playsInline
|
|
||||||
muted
|
|
||||||
loop
|
|
||||||
style={{width: '100%'}}
|
|
||||||
>
|
|
||||||
<source src="https://github.com/user-attachments/assets/6cc1659f-eba4-49ca-a0a5-7c243557b4f5" type="video/mp4" />
|
|
||||||
Your browser does not support the video tag.
|
|
||||||
</video>
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="scoring" label="Scoring Evaluations">
|
|
||||||
|
|
||||||
**Custom Dataset Evaluation**
|
|
||||||
- Upload your own evaluation datasets
|
|
||||||
- Run evaluations using available scoring functions
|
|
||||||
- Uses Llama Stack's `/scoring` API for flexible evaluation workflows
|
|
||||||
- Great for testing application performance on custom metrics
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="benchmarks" label="Benchmark Evaluations">
|
|
||||||
|
|
||||||
<video
|
|
||||||
controls
|
|
||||||
autoPlay
|
|
||||||
playsInline
|
|
||||||
muted
|
|
||||||
loop
|
|
||||||
style={{width: '100%', marginBottom: '1rem'}}
|
|
||||||
>
|
|
||||||
<source src="https://github.com/user-attachments/assets/345845c7-2a2b-4095-960a-9ae40f6a93cf" type="video/mp4" />
|
|
||||||
Your browser does not support the video tag.
|
|
||||||
</video>
|
|
||||||
|
|
||||||
**Pre-registered Evaluation Tasks**
|
|
||||||
- Evaluate models or agents on pre-defined tasks
|
|
||||||
- Uses Llama Stack's `/eval` API for comprehensive evaluation
|
|
||||||
- Combines datasets and scoring functions for standardized testing
|
|
||||||
|
|
||||||
**Setup Requirements:**
|
|
||||||
Register evaluation datasets and benchmarks first:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Register evaluation dataset
|
|
||||||
llama-stack-client datasets register \
|
|
||||||
--dataset-id "mmlu" \
|
|
||||||
--provider-id "huggingface" \
|
|
||||||
--url "https://huggingface.co/datasets/llamastack/evals" \
|
|
||||||
--metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
|
|
||||||
--schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string"}, "chat_completion_input": {"type": "string"}}'
|
|
||||||
|
|
||||||
# Register benchmark task
|
|
||||||
llama-stack-client benchmarks register \
|
|
||||||
--eval-task-id meta-reference-mmlu \
|
|
||||||
--provider-id meta-reference \
|
|
||||||
--dataset-id mmlu \
|
|
||||||
--scoring-functions basic::regex_parser_multiple_choice_answer
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
#### Inspection Interface
|
|
||||||
|
|
||||||
<video
|
|
||||||
controls
|
|
||||||
autoPlay
|
|
||||||
playsInline
|
|
||||||
muted
|
|
||||||
loop
|
|
||||||
style={{width: '100%'}}
|
|
||||||
>
|
|
||||||
<source src="https://github.com/user-attachments/assets/01d52b2d-92af-4e3a-b623-a9b8ba22ba99" type="video/mp4" />
|
|
||||||
Your browser does not support the video tag.
|
|
||||||
</video>
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="providers" label="API Providers">
|
|
||||||
|
|
||||||
**Provider Management**
|
|
||||||
- Inspect available Llama Stack API providers
|
|
||||||
- View provider configurations and capabilities
|
|
||||||
- Uses the `/providers` API for real-time provider information
|
|
||||||
- Essential for understanding your deployment's capabilities
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="resources" label="API Resources">
|
|
||||||
|
|
||||||
**Resource Exploration**
|
|
||||||
- Inspect Llama Stack API resources including:
|
|
||||||
- **Models**: Available language models
|
|
||||||
- **Datasets**: Registered evaluation datasets
|
|
||||||
- **Memory Banks**: Vector databases and knowledge stores
|
|
||||||
- **Benchmarks**: Evaluation tasks and scoring functions
|
|
||||||
- **Shields**: Safety and content moderation tools
|
|
||||||
- Uses `/<resources>/list` APIs for comprehensive resource visibility
|
|
||||||
- For detailed information about resources, see [Core Concepts](/docs/concepts)
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
## Getting Started
|
|
||||||
|
|
||||||
### Quick Start Guide
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="setup" label="Setup">
|
|
||||||
|
|
||||||
**1. Start the Llama Stack API Server**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llama stack list-deps together | xargs -L1 uv pip install
|
|
||||||
llama stack run together
|
|
||||||
```
|
|
||||||
|
|
||||||
**2. Start the Streamlit UI**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Launch the playground interface
|
|
||||||
uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="usage" label="Usage Tips">
|
|
||||||
|
|
||||||
**Making the Most of the Playground:**
|
|
||||||
|
|
||||||
- **Start with Chat**: Test basic model interactions and prompt engineering
|
|
||||||
- **Explore RAG**: Upload sample documents to see knowledge-enhanced responses
|
|
||||||
- **Try Evaluations**: Use the scoring interface to understand evaluation metrics
|
|
||||||
- **Inspect Resources**: Check what providers and resources are available
|
|
||||||
- **Experiment with Settings**: Adjust parameters to see how they affect results
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
### Available Distributions
|
|
||||||
|
|
||||||
The playground works with any Llama Stack distribution. Popular options include:
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="together" label="Together AI">
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llama stack list-deps together | xargs -L1 uv pip install
|
|
||||||
llama stack run together
|
|
||||||
```
|
|
||||||
|
|
||||||
**Features:**
|
|
||||||
- Cloud-hosted models
|
|
||||||
- Fast inference
|
|
||||||
- Multiple model options
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="ollama" label="Ollama (Local)">
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llama stack list-deps ollama | xargs -L1 uv pip install
|
|
||||||
llama stack run ollama
|
|
||||||
```
|
|
||||||
|
|
||||||
**Features:**
|
|
||||||
- Local model execution
|
|
||||||
- Privacy-focused
|
|
||||||
- No internet required
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="meta-reference" label="Meta Reference">
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llama stack list-deps meta-reference | xargs -L1 uv pip install
|
|
||||||
llama stack run meta-reference
|
|
||||||
```
|
|
||||||
|
|
||||||
**Features:**
|
|
||||||
- Reference implementation
|
|
||||||
- All API features available
|
|
||||||
- Best for development
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
## Use Cases & Examples
|
|
||||||
|
|
||||||
### Educational Use Cases
|
|
||||||
- **Learning Llama Stack**: Hands-on exploration of API capabilities
|
|
||||||
- **Prompt Engineering**: Interactive testing of different prompting strategies
|
|
||||||
- **RAG Experimentation**: Understanding how document retrieval affects responses
|
|
||||||
- **Evaluation Understanding**: See how different metrics evaluate model performance
|
|
||||||
|
|
||||||
### Development Use Cases
|
|
||||||
- **Prototype Testing**: Quick validation of application concepts
|
|
||||||
- **API Exploration**: Understanding available endpoints and parameters
|
|
||||||
- **Integration Planning**: Seeing how different components work together
|
|
||||||
- **Demo Creation**: Showcasing Llama Stack capabilities to stakeholders
|
|
||||||
|
|
||||||
### Research Use Cases
|
|
||||||
- **Model Comparison**: Side-by-side testing of different models
|
|
||||||
- **Evaluation Design**: Understanding how scoring functions work
|
|
||||||
- **Safety Testing**: Exploring shield effectiveness with different inputs
|
|
||||||
- **Performance Analysis**: Measuring model behavior across different scenarios
|
|
||||||
|
|
||||||
## Best Practices
|
|
||||||
|
|
||||||
### 🚀 **Getting Started**
|
|
||||||
- Begin with simple chat interactions to understand basic functionality
|
|
||||||
- Gradually explore more advanced features like RAG and evaluations
|
|
||||||
- Use the inspection tools to understand your deployment's capabilities
|
|
||||||
|
|
||||||
### 🔧 **Development Workflow**
|
|
||||||
- Use the playground to prototype before writing application code
|
|
||||||
- Test different parameter settings interactively
|
|
||||||
- Validate evaluation approaches before implementing them programmatically
|
|
||||||
|
|
||||||
### 📊 **Evaluation & Testing**
|
|
||||||
- Start with simple scoring functions before trying complex evaluations
|
|
||||||
- Use the playground to understand evaluation results before automation
|
|
||||||
- Test safety features with various input types
|
|
||||||
|
|
||||||
### 🎯 **Production Preparation**
|
|
||||||
- Use playground insights to inform your production API usage
|
|
||||||
- Test edge cases and error conditions interactively
|
|
||||||
- Validate resource configurations before deployment
|
|
||||||
|
|
||||||
## Related Resources
|
|
||||||
|
|
||||||
- **[Getting Started Guide](../getting_started/quickstart)** - Complete setup and introduction
|
|
||||||
- **[Core Concepts](/docs/concepts)** - Understanding Llama Stack fundamentals
|
|
||||||
- **[Agents](./agent)** - Building intelligent agents
|
|
||||||
- **[RAG (Retrieval Augmented Generation)](./rag)** - Knowledge-enhanced applications
|
|
||||||
- **[Evaluations](./evals)** - Comprehensive evaluation framework
|
|
||||||
- **[API Reference](/docs/api/llama-stack-specification)** - Complete API documentation
|
|
||||||
|
|
@ -51,14 +51,6 @@ dependencies = [
|
||||||
"sqlalchemy[asyncio]>=2.0.41", # server - for conversations
|
"sqlalchemy[asyncio]>=2.0.41", # server - for conversations
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
|
||||||
ui = [
|
|
||||||
"streamlit",
|
|
||||||
"pandas",
|
|
||||||
"llama-stack-client>=0.3.0",
|
|
||||||
"streamlit-option-menu",
|
|
||||||
]
|
|
||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
dev = [
|
dev = [
|
||||||
"pytest>=8.4",
|
"pytest>=8.4",
|
||||||
|
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
# More info on playground configuration can be found here:
|
|
||||||
# https://llama-stack.readthedocs.io/en/latest/playground
|
|
||||||
|
|
||||||
FROM python:3.12-slim
|
|
||||||
WORKDIR /app
|
|
||||||
COPY . /app/
|
|
||||||
RUN /usr/local/bin/python -m pip install --upgrade pip && \
|
|
||||||
/usr/local/bin/pip3 install -r requirements.txt
|
|
||||||
EXPOSE 8501
|
|
||||||
|
|
||||||
ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
|
||||||
|
|
@ -1,50 +0,0 @@
|
||||||
# (Experimental) LLama Stack UI
|
|
||||||
|
|
||||||
## Docker Setup
|
|
||||||
|
|
||||||
:warning: This is a work in progress.
|
|
||||||
|
|
||||||
## Developer Setup
|
|
||||||
|
|
||||||
1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll).
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack list-deps together | xargs -L1 uv pip install
|
|
||||||
|
|
||||||
llama stack run together
|
|
||||||
```
|
|
||||||
|
|
||||||
2. (Optional) Register datasets and eval tasks as resources. If you want to run pre-configured evaluation flows (e.g. Evaluations (Generation + Scoring) Page).
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llama-stack-client datasets register \
|
|
||||||
--dataset-id "mmlu" \
|
|
||||||
--provider-id "huggingface" \
|
|
||||||
--url "https://huggingface.co/datasets/llamastack/evals" \
|
|
||||||
--metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
|
|
||||||
--schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
|
|
||||||
```
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llama-stack-client benchmarks register \
|
|
||||||
--eval-task-id meta-reference-mmlu \
|
|
||||||
--provider-id meta-reference \
|
|
||||||
--dataset-id mmlu \
|
|
||||||
--scoring-functions basic::regex_parser_multiple_choice_answer
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Start Streamlit UI
|
|
||||||
|
|
||||||
```bash
|
|
||||||
uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
|
|
||||||
```
|
|
||||||
|
|
||||||
## Environment Variables
|
|
||||||
|
|
||||||
| Environment Variable | Description | Default Value |
|
|
||||||
|----------------------------|------------------------------------|---------------------------|
|
|
||||||
| LLAMA_STACK_ENDPOINT | The endpoint for the Llama Stack | http://localhost:8321 |
|
|
||||||
| FIREWORKS_API_KEY | API key for Fireworks provider | (empty string) |
|
|
||||||
| TOGETHER_API_KEY | API key for Together provider | (empty string) |
|
|
||||||
| SAMBANOVA_API_KEY | API key for SambaNova provider | (empty string) |
|
|
||||||
| OPENAI_API_KEY | API key for OpenAI provider | (empty string) |
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
@ -1,55 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
import streamlit as st
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
# Evaluation pages
|
|
||||||
application_evaluation_page = st.Page(
|
|
||||||
"page/evaluations/app_eval.py",
|
|
||||||
title="Evaluations (Scoring)",
|
|
||||||
icon="📊",
|
|
||||||
default=False,
|
|
||||||
)
|
|
||||||
native_evaluation_page = st.Page(
|
|
||||||
"page/evaluations/native_eval.py",
|
|
||||||
title="Evaluations (Generation + Scoring)",
|
|
||||||
icon="📊",
|
|
||||||
default=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Playground pages
|
|
||||||
chat_page = st.Page("page/playground/chat.py", title="Chat", icon="💬", default=True)
|
|
||||||
rag_page = st.Page("page/playground/rag.py", title="RAG", icon="💬", default=False)
|
|
||||||
tool_page = st.Page("page/playground/tools.py", title="Tools", icon="🛠", default=False)
|
|
||||||
|
|
||||||
# Distribution pages
|
|
||||||
resources_page = st.Page("page/distribution/resources.py", title="Resources", icon="🔍", default=False)
|
|
||||||
provider_page = st.Page(
|
|
||||||
"page/distribution/providers.py",
|
|
||||||
title="API Providers",
|
|
||||||
icon="🔍",
|
|
||||||
default=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
pg = st.navigation(
|
|
||||||
{
|
|
||||||
"Playground": [
|
|
||||||
chat_page,
|
|
||||||
rag_page,
|
|
||||||
tool_page,
|
|
||||||
application_evaluation_page,
|
|
||||||
native_evaluation_page,
|
|
||||||
],
|
|
||||||
"Inspect": [provider_page, resources_page],
|
|
||||||
},
|
|
||||||
expanded=False,
|
|
||||||
)
|
|
||||||
pg.run()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
@ -1,32 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
from llama_stack_client import LlamaStackClient
|
|
||||||
|
|
||||||
|
|
||||||
class LlamaStackApi:
|
|
||||||
def __init__(self):
|
|
||||||
self.client = LlamaStackClient(
|
|
||||||
base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:8321"),
|
|
||||||
provider_data={
|
|
||||||
"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""),
|
|
||||||
"together_api_key": os.environ.get("TOGETHER_API_KEY", ""),
|
|
||||||
"sambanova_api_key": os.environ.get("SAMBANOVA_API_KEY", ""),
|
|
||||||
"openai_api_key": os.environ.get("OPENAI_API_KEY", ""),
|
|
||||||
"tavily_search_api_key": os.environ.get("TAVILY_SEARCH_API_KEY", ""),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
|
|
||||||
"""Run scoring on a single row"""
|
|
||||||
if not scoring_params:
|
|
||||||
scoring_params = dict.fromkeys(scoring_function_ids)
|
|
||||||
return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
|
|
||||||
|
|
||||||
|
|
||||||
llama_stack_api = LlamaStackApi()
|
|
||||||
|
|
@ -1,42 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import base64
|
|
||||||
import os
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import streamlit as st
|
|
||||||
|
|
||||||
|
|
||||||
def process_dataset(file):
|
|
||||||
if file is None:
|
|
||||||
return "No file uploaded", None
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Determine file type and read accordingly
|
|
||||||
file_ext = os.path.splitext(file.name)[1].lower()
|
|
||||||
if file_ext == ".csv":
|
|
||||||
df = pd.read_csv(file)
|
|
||||||
elif file_ext in [".xlsx", ".xls"]:
|
|
||||||
df = pd.read_excel(file)
|
|
||||||
else:
|
|
||||||
return "Unsupported file format. Please upload a CSV or Excel file.", None
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
st.error(f"Error processing file: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def data_url_from_file(file) -> str:
|
|
||||||
file_content = file.getvalue()
|
|
||||||
base64_content = base64.b64encode(file_content).decode("utf-8")
|
|
||||||
mime_type = file.type
|
|
||||||
|
|
||||||
data_url = f"data:{mime_type};base64,{base64_content}"
|
|
||||||
|
|
||||||
return data_url
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import streamlit as st
|
|
||||||
|
|
||||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
||||||
|
|
||||||
|
|
||||||
def datasets():
|
|
||||||
st.header("Datasets")
|
|
||||||
|
|
||||||
datasets_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()}
|
|
||||||
if len(datasets_info) > 0:
|
|
||||||
selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
|
|
||||||
st.json(datasets_info[selected_dataset], expanded=True)
|
|
||||||
|
|
@ -1,20 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import streamlit as st
|
|
||||||
|
|
||||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
||||||
|
|
||||||
|
|
||||||
def benchmarks():
|
|
||||||
# Benchmarks Section
|
|
||||||
st.header("Benchmarks")
|
|
||||||
|
|
||||||
benchmarks_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.benchmarks.list()}
|
|
||||||
|
|
||||||
if len(benchmarks_info) > 0:
|
|
||||||
selected_benchmark = st.selectbox("Select an eval task", list(benchmarks_info.keys()), key="benchmark_inspect")
|
|
||||||
st.json(benchmarks_info[selected_benchmark], expanded=True)
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import streamlit as st
|
|
||||||
|
|
||||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
||||||
|
|
||||||
|
|
||||||
def models():
|
|
||||||
# Models Section
|
|
||||||
st.header("Models")
|
|
||||||
models_info = {m.id: m.model_dump() for m in llama_stack_api.client.models.list()}
|
|
||||||
|
|
||||||
selected_model = st.selectbox("Select a model", list(models_info.keys()))
|
|
||||||
st.json(models_info[selected_model])
|
|
||||||
|
|
@ -1,27 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import streamlit as st
|
|
||||||
|
|
||||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
||||||
|
|
||||||
|
|
||||||
def providers():
|
|
||||||
st.header("🔍 API Providers")
|
|
||||||
apis_providers_lst = llama_stack_api.client.providers.list()
|
|
||||||
api_to_providers = {}
|
|
||||||
for api_provider in apis_providers_lst:
|
|
||||||
if api_provider.api in api_to_providers:
|
|
||||||
api_to_providers[api_provider.api].append(api_provider)
|
|
||||||
else:
|
|
||||||
api_to_providers[api_provider.api] = [api_provider]
|
|
||||||
|
|
||||||
for api in api_to_providers.keys():
|
|
||||||
st.markdown(f"###### {api}")
|
|
||||||
st.dataframe([x.to_dict() for x in api_to_providers[api]], width=500)
|
|
||||||
|
|
||||||
|
|
||||||
providers()
|
|
||||||
|
|
@ -1,48 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
from streamlit_option_menu import option_menu
|
|
||||||
|
|
||||||
from llama_stack.core.ui.page.distribution.datasets import datasets
|
|
||||||
from llama_stack.core.ui.page.distribution.eval_tasks import benchmarks
|
|
||||||
from llama_stack.core.ui.page.distribution.models import models
|
|
||||||
from llama_stack.core.ui.page.distribution.scoring_functions import scoring_functions
|
|
||||||
from llama_stack.core.ui.page.distribution.shields import shields
|
|
||||||
|
|
||||||
|
|
||||||
def resources_page():
|
|
||||||
options = [
|
|
||||||
"Models",
|
|
||||||
"Shields",
|
|
||||||
"Scoring Functions",
|
|
||||||
"Datasets",
|
|
||||||
"Benchmarks",
|
|
||||||
]
|
|
||||||
icons = ["magic", "shield", "file-bar-graph", "database", "list-task"]
|
|
||||||
selected_resource = option_menu(
|
|
||||||
None,
|
|
||||||
options,
|
|
||||||
icons=icons,
|
|
||||||
orientation="horizontal",
|
|
||||||
styles={
|
|
||||||
"nav-link": {
|
|
||||||
"font-size": "12px",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
if selected_resource == "Benchmarks":
|
|
||||||
benchmarks()
|
|
||||||
elif selected_resource == "Datasets":
|
|
||||||
datasets()
|
|
||||||
elif selected_resource == "Models":
|
|
||||||
models()
|
|
||||||
elif selected_resource == "Scoring Functions":
|
|
||||||
scoring_functions()
|
|
||||||
elif selected_resource == "Shields":
|
|
||||||
shields()
|
|
||||||
|
|
||||||
|
|
||||||
resources_page()
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import streamlit as st
|
|
||||||
|
|
||||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
||||||
|
|
||||||
|
|
||||||
def scoring_functions():
|
|
||||||
st.header("Scoring Functions")
|
|
||||||
|
|
||||||
scoring_functions_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.scoring_functions.list()}
|
|
||||||
|
|
||||||
selected_scoring_function = st.selectbox("Select a scoring function", list(scoring_functions_info.keys()))
|
|
||||||
st.json(scoring_functions_info[selected_scoring_function], expanded=True)
|
|
||||||
|
|
@ -1,19 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import streamlit as st
|
|
||||||
|
|
||||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
||||||
|
|
||||||
|
|
||||||
def shields():
|
|
||||||
# Shields Section
|
|
||||||
st.header("Shields")
|
|
||||||
|
|
||||||
shields_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.shields.list()}
|
|
||||||
|
|
||||||
selected_shield = st.selectbox("Select a shield", list(shields_info.keys()))
|
|
||||||
st.json(shields_info[selected_shield])
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
@ -1,143 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import streamlit as st
|
|
||||||
|
|
||||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
||||||
from llama_stack.core.ui.modules.utils import process_dataset
|
|
||||||
|
|
||||||
|
|
||||||
def application_evaluation_page():
|
|
||||||
st.set_page_config(page_title="Evaluations (Scoring)", page_icon="🦙")
|
|
||||||
st.title("📊 Evaluations (Scoring)")
|
|
||||||
|
|
||||||
# File uploader
|
|
||||||
uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"])
|
|
||||||
|
|
||||||
if uploaded_file is None:
|
|
||||||
st.error("No file uploaded")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Process uploaded file
|
|
||||||
df = process_dataset(uploaded_file)
|
|
||||||
if df is None:
|
|
||||||
st.error("Error processing file")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Display dataset information
|
|
||||||
st.success("Dataset loaded successfully!")
|
|
||||||
|
|
||||||
# Display dataframe preview
|
|
||||||
st.subheader("Dataset Preview")
|
|
||||||
st.dataframe(df)
|
|
||||||
|
|
||||||
# Select Scoring Functions to Run Evaluation On
|
|
||||||
st.subheader("Select Scoring Functions")
|
|
||||||
scoring_functions = llama_stack_api.client.scoring_functions.list()
|
|
||||||
scoring_functions = {sf.identifier: sf for sf in scoring_functions}
|
|
||||||
scoring_functions_names = list(scoring_functions.keys())
|
|
||||||
selected_scoring_functions = st.multiselect(
|
|
||||||
"Choose one or more scoring functions",
|
|
||||||
options=scoring_functions_names,
|
|
||||||
help="Choose one or more scoring functions.",
|
|
||||||
)
|
|
||||||
|
|
||||||
available_models = llama_stack_api.client.models.list()
|
|
||||||
available_models = [m.identifier for m in available_models]
|
|
||||||
|
|
||||||
scoring_params = {}
|
|
||||||
if selected_scoring_functions:
|
|
||||||
st.write("Selected:")
|
|
||||||
for scoring_fn_id in selected_scoring_functions:
|
|
||||||
scoring_fn = scoring_functions[scoring_fn_id]
|
|
||||||
st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}")
|
|
||||||
new_params = None
|
|
||||||
if scoring_fn.params:
|
|
||||||
new_params = {}
|
|
||||||
for param_name, param_value in scoring_fn.params.to_dict().items():
|
|
||||||
if param_name == "type":
|
|
||||||
new_params[param_name] = param_value
|
|
||||||
continue
|
|
||||||
|
|
||||||
if param_name == "judge_model":
|
|
||||||
value = st.selectbox(
|
|
||||||
f"Select **{param_name}** for {scoring_fn_id}",
|
|
||||||
options=available_models,
|
|
||||||
index=0,
|
|
||||||
key=f"{scoring_fn_id}_{param_name}",
|
|
||||||
)
|
|
||||||
new_params[param_name] = value
|
|
||||||
else:
|
|
||||||
value = st.text_area(
|
|
||||||
f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format",
|
|
||||||
value=json.dumps(param_value, indent=2),
|
|
||||||
height=80,
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
new_params[param_name] = json.loads(value)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
st.error(f"Invalid JSON for **{param_name}** in {scoring_fn_id}")
|
|
||||||
|
|
||||||
st.json(new_params)
|
|
||||||
scoring_params[scoring_fn_id] = new_params
|
|
||||||
|
|
||||||
# Add run evaluation button & slider
|
|
||||||
total_rows = len(df)
|
|
||||||
num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows)
|
|
||||||
|
|
||||||
if st.button("Run Evaluation"):
|
|
||||||
progress_text = "Running evaluation..."
|
|
||||||
progress_bar = st.progress(0, text=progress_text)
|
|
||||||
rows = df.to_dict(orient="records")
|
|
||||||
if num_rows < total_rows:
|
|
||||||
rows = rows[:num_rows]
|
|
||||||
|
|
||||||
# Create separate containers for progress text and results
|
|
||||||
progress_text_container = st.empty()
|
|
||||||
results_container = st.empty()
|
|
||||||
output_res = {}
|
|
||||||
for i, r in enumerate(rows):
|
|
||||||
# Update progress
|
|
||||||
progress = i / len(rows)
|
|
||||||
progress_bar.progress(progress, text=progress_text)
|
|
||||||
|
|
||||||
# Run evaluation for current row
|
|
||||||
score_res = llama_stack_api.run_scoring(
|
|
||||||
r,
|
|
||||||
scoring_function_ids=selected_scoring_functions,
|
|
||||||
scoring_params=scoring_params,
|
|
||||||
)
|
|
||||||
|
|
||||||
for k in r.keys():
|
|
||||||
if k not in output_res:
|
|
||||||
output_res[k] = []
|
|
||||||
output_res[k].append(r[k])
|
|
||||||
|
|
||||||
for fn_id in selected_scoring_functions:
|
|
||||||
if fn_id not in output_res:
|
|
||||||
output_res[fn_id] = []
|
|
||||||
output_res[fn_id].append(score_res.results[fn_id].score_rows[0])
|
|
||||||
|
|
||||||
# Display current row results using separate containers
|
|
||||||
progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})")
|
|
||||||
results_container.json(
|
|
||||||
score_res.to_json(),
|
|
||||||
expanded=2,
|
|
||||||
)
|
|
||||||
|
|
||||||
progress_bar.progress(1.0, text="Evaluation complete!")
|
|
||||||
|
|
||||||
# Display results in dataframe
|
|
||||||
if output_res:
|
|
||||||
output_df = pd.DataFrame(output_res)
|
|
||||||
st.subheader("Evaluation Results")
|
|
||||||
st.dataframe(output_df)
|
|
||||||
|
|
||||||
|
|
||||||
application_evaluation_page()
|
|
||||||
|
|
@ -1,253 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import streamlit as st
|
|
||||||
|
|
||||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
||||||
|
|
||||||
|
|
||||||
def select_benchmark_1():
|
|
||||||
# Select Benchmarks
|
|
||||||
st.subheader("1. Choose An Eval Task")
|
|
||||||
benchmarks = llama_stack_api.client.benchmarks.list()
|
|
||||||
benchmarks = {et.identifier: et for et in benchmarks}
|
|
||||||
benchmarks_names = list(benchmarks.keys())
|
|
||||||
selected_benchmark = st.selectbox(
|
|
||||||
"Choose an eval task.",
|
|
||||||
options=benchmarks_names,
|
|
||||||
help="Choose an eval task. Each eval task is parameterized by a dataset, and list of scoring functions.",
|
|
||||||
)
|
|
||||||
with st.expander("View Eval Task"):
|
|
||||||
st.json(benchmarks[selected_benchmark], expanded=True)
|
|
||||||
|
|
||||||
st.session_state["selected_benchmark"] = selected_benchmark
|
|
||||||
st.session_state["benchmarks"] = benchmarks
|
|
||||||
if st.button("Confirm", key="confirm_1"):
|
|
||||||
st.session_state["selected_benchmark_1_next"] = True
|
|
||||||
|
|
||||||
|
|
||||||
def define_eval_candidate_2():
|
|
||||||
if not st.session_state.get("selected_benchmark_1_next", None):
|
|
||||||
return
|
|
||||||
|
|
||||||
st.subheader("2. Define Eval Candidate")
|
|
||||||
st.info(
|
|
||||||
"""
|
|
||||||
Define the configurations for the evaluation candidate model or agent used for generation.
|
|
||||||
Select "model" if you want to run generation with inference API, or "agent" if you want to run generation with agent API through specifying AgentConfig.
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
with st.expander("Define Eval Candidate", expanded=True):
|
|
||||||
# Define Eval Candidate
|
|
||||||
candidate_type = st.radio("Candidate Type", ["model", "agent"])
|
|
||||||
|
|
||||||
available_models = llama_stack_api.client.models.list()
|
|
||||||
available_models = [model.identifier for model in available_models]
|
|
||||||
selected_model = st.selectbox(
|
|
||||||
"Choose a model",
|
|
||||||
available_models,
|
|
||||||
index=0,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Sampling Parameters
|
|
||||||
st.markdown("##### Sampling Parameters")
|
|
||||||
temperature = st.slider(
|
|
||||||
"Temperature",
|
|
||||||
min_value=0.0,
|
|
||||||
max_value=1.0,
|
|
||||||
value=0.0,
|
|
||||||
step=0.1,
|
|
||||||
help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
|
|
||||||
)
|
|
||||||
top_p = st.slider(
|
|
||||||
"Top P",
|
|
||||||
min_value=0.0,
|
|
||||||
max_value=1.0,
|
|
||||||
value=0.95,
|
|
||||||
step=0.1,
|
|
||||||
)
|
|
||||||
max_tokens = st.slider(
|
|
||||||
"Max Tokens",
|
|
||||||
min_value=0,
|
|
||||||
max_value=4096,
|
|
||||||
value=512,
|
|
||||||
step=1,
|
|
||||||
help="The maximum number of tokens to generate",
|
|
||||||
)
|
|
||||||
repetition_penalty = st.slider(
|
|
||||||
"Repetition Penalty",
|
|
||||||
min_value=1.0,
|
|
||||||
max_value=2.0,
|
|
||||||
value=1.0,
|
|
||||||
step=0.1,
|
|
||||||
help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
|
|
||||||
)
|
|
||||||
if candidate_type == "model":
|
|
||||||
if temperature > 0.0:
|
|
||||||
strategy = {
|
|
||||||
"type": "top_p",
|
|
||||||
"temperature": temperature,
|
|
||||||
"top_p": top_p,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
strategy = {"type": "greedy"}
|
|
||||||
|
|
||||||
eval_candidate = {
|
|
||||||
"type": "model",
|
|
||||||
"model": selected_model,
|
|
||||||
"sampling_params": {
|
|
||||||
"strategy": strategy,
|
|
||||||
"max_tokens": max_tokens,
|
|
||||||
"repetition_penalty": repetition_penalty,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
elif candidate_type == "agent":
|
|
||||||
system_prompt = st.text_area(
|
|
||||||
"System Prompt",
|
|
||||||
value="You are a helpful AI assistant.",
|
|
||||||
help="Initial instructions given to the AI to set its behavior and context",
|
|
||||||
)
|
|
||||||
tools_json = st.text_area(
|
|
||||||
"Tools Configuration (JSON)",
|
|
||||||
value=json.dumps(
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"type": "brave_search",
|
|
||||||
"engine": "brave",
|
|
||||||
"api_key": "ENTER_BRAVE_API_KEY_HERE",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
),
|
|
||||||
help="Enter tool configurations in JSON format. Each tool should have a name, description, and parameters.",
|
|
||||||
height=200,
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
tools = json.loads(tools_json)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
st.error("Invalid JSON format for tools configuration")
|
|
||||||
tools = []
|
|
||||||
eval_candidate = {
|
|
||||||
"type": "agent",
|
|
||||||
"config": {
|
|
||||||
"model": selected_model,
|
|
||||||
"instructions": system_prompt,
|
|
||||||
"tools": tools,
|
|
||||||
"tool_choice": "auto",
|
|
||||||
"tool_prompt_format": "json",
|
|
||||||
"input_shields": [],
|
|
||||||
"output_shields": [],
|
|
||||||
"enable_session_persistence": False,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
st.session_state["eval_candidate"] = eval_candidate
|
|
||||||
|
|
||||||
if st.button("Confirm", key="confirm_2"):
|
|
||||||
st.session_state["selected_eval_candidate_2_next"] = True
|
|
||||||
|
|
||||||
|
|
||||||
def run_evaluation_3():
|
|
||||||
if not st.session_state.get("selected_eval_candidate_2_next", None):
|
|
||||||
return
|
|
||||||
|
|
||||||
st.subheader("3. Run Evaluation")
|
|
||||||
# Add info box to explain configurations being used
|
|
||||||
st.info(
|
|
||||||
"""
|
|
||||||
Review the configurations that will be used for this evaluation run, make any necessary changes, and then click the "Run Evaluation" button.
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
selected_benchmark = st.session_state["selected_benchmark"]
|
|
||||||
benchmarks = st.session_state["benchmarks"]
|
|
||||||
eval_candidate = st.session_state["eval_candidate"]
|
|
||||||
|
|
||||||
dataset_id = benchmarks[selected_benchmark].dataset_id
|
|
||||||
rows = llama_stack_api.client.datasets.iterrows(
|
|
||||||
dataset_id=dataset_id,
|
|
||||||
)
|
|
||||||
total_rows = len(rows.data)
|
|
||||||
# Add number of examples control
|
|
||||||
num_rows = st.number_input(
|
|
||||||
"Number of Examples to Evaluate",
|
|
||||||
min_value=1,
|
|
||||||
max_value=total_rows,
|
|
||||||
value=5,
|
|
||||||
help="Number of examples from the dataset to evaluate. ",
|
|
||||||
)
|
|
||||||
|
|
||||||
benchmark_config = {
|
|
||||||
"type": "benchmark",
|
|
||||||
"eval_candidate": eval_candidate,
|
|
||||||
"scoring_params": {},
|
|
||||||
}
|
|
||||||
|
|
||||||
with st.expander("View Evaluation Task", expanded=True):
|
|
||||||
st.json(benchmarks[selected_benchmark], expanded=True)
|
|
||||||
with st.expander("View Evaluation Task Configuration", expanded=True):
|
|
||||||
st.json(benchmark_config, expanded=True)
|
|
||||||
|
|
||||||
# Add run button and handle evaluation
|
|
||||||
if st.button("Run Evaluation"):
|
|
||||||
progress_text = "Running evaluation..."
|
|
||||||
progress_bar = st.progress(0, text=progress_text)
|
|
||||||
rows = rows.data
|
|
||||||
if num_rows < total_rows:
|
|
||||||
rows = rows[:num_rows]
|
|
||||||
|
|
||||||
# Create separate containers for progress text and results
|
|
||||||
progress_text_container = st.empty()
|
|
||||||
results_container = st.empty()
|
|
||||||
output_res = {}
|
|
||||||
for i, r in enumerate(rows):
|
|
||||||
# Update progress
|
|
||||||
progress = i / len(rows)
|
|
||||||
progress_bar.progress(progress, text=progress_text)
|
|
||||||
# Run evaluation for current row
|
|
||||||
eval_res = llama_stack_api.client.eval.evaluate_rows(
|
|
||||||
benchmark_id=selected_benchmark,
|
|
||||||
input_rows=[r],
|
|
||||||
scoring_functions=benchmarks[selected_benchmark].scoring_functions,
|
|
||||||
benchmark_config=benchmark_config,
|
|
||||||
)
|
|
||||||
|
|
||||||
for k in r.keys():
|
|
||||||
if k not in output_res:
|
|
||||||
output_res[k] = []
|
|
||||||
output_res[k].append(r[k])
|
|
||||||
|
|
||||||
for k in eval_res.generations[0].keys():
|
|
||||||
if k not in output_res:
|
|
||||||
output_res[k] = []
|
|
||||||
output_res[k].append(eval_res.generations[0][k])
|
|
||||||
|
|
||||||
for scoring_fn in benchmarks[selected_benchmark].scoring_functions:
|
|
||||||
if scoring_fn not in output_res:
|
|
||||||
output_res[scoring_fn] = []
|
|
||||||
output_res[scoring_fn].append(eval_res.scores[scoring_fn].score_rows[0])
|
|
||||||
|
|
||||||
progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})")
|
|
||||||
results_container.json(eval_res, expanded=2)
|
|
||||||
|
|
||||||
progress_bar.progress(1.0, text="Evaluation complete!")
|
|
||||||
# Display results in dataframe
|
|
||||||
if output_res:
|
|
||||||
output_df = pd.DataFrame(output_res)
|
|
||||||
st.subheader("Evaluation Results")
|
|
||||||
st.dataframe(output_df)
|
|
||||||
|
|
||||||
|
|
||||||
def native_evaluation_page():
|
|
||||||
st.set_page_config(page_title="Evaluations (Generation + Scoring)", page_icon="🦙")
|
|
||||||
st.title("📊 Evaluations (Generation + Scoring)")
|
|
||||||
|
|
||||||
select_benchmark_1()
|
|
||||||
define_eval_candidate_2()
|
|
||||||
run_evaluation_3()
|
|
||||||
|
|
||||||
|
|
||||||
native_evaluation_page()
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
@ -1,134 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import streamlit as st
|
|
||||||
|
|
||||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
||||||
|
|
||||||
# Sidebar configurations
|
|
||||||
with st.sidebar:
|
|
||||||
st.header("Configuration")
|
|
||||||
available_models = llama_stack_api.client.models.list()
|
|
||||||
available_models = [
|
|
||||||
model.id
|
|
||||||
for model in available_models
|
|
||||||
if model.custom_metadata and model.custom_metadata.get("model_type") == "llm"
|
|
||||||
]
|
|
||||||
selected_model = st.selectbox(
|
|
||||||
"Choose a model",
|
|
||||||
available_models,
|
|
||||||
index=0,
|
|
||||||
)
|
|
||||||
|
|
||||||
temperature = st.slider(
|
|
||||||
"Temperature",
|
|
||||||
min_value=0.0,
|
|
||||||
max_value=1.0,
|
|
||||||
value=0.0,
|
|
||||||
step=0.1,
|
|
||||||
help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
|
|
||||||
)
|
|
||||||
|
|
||||||
top_p = st.slider(
|
|
||||||
"Top P",
|
|
||||||
min_value=0.0,
|
|
||||||
max_value=1.0,
|
|
||||||
value=0.95,
|
|
||||||
step=0.1,
|
|
||||||
)
|
|
||||||
|
|
||||||
max_tokens = st.slider(
|
|
||||||
"Max Tokens",
|
|
||||||
min_value=0,
|
|
||||||
max_value=4096,
|
|
||||||
value=512,
|
|
||||||
step=1,
|
|
||||||
help="The maximum number of tokens to generate",
|
|
||||||
)
|
|
||||||
|
|
||||||
repetition_penalty = st.slider(
|
|
||||||
"Repetition Penalty",
|
|
||||||
min_value=1.0,
|
|
||||||
max_value=2.0,
|
|
||||||
value=1.0,
|
|
||||||
step=0.1,
|
|
||||||
help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
|
|
||||||
)
|
|
||||||
|
|
||||||
stream = st.checkbox("Stream", value=True)
|
|
||||||
system_prompt = st.text_area(
|
|
||||||
"System Prompt",
|
|
||||||
value="You are a helpful AI assistant.",
|
|
||||||
help="Initial instructions given to the AI to set its behavior and context",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add clear chat button to sidebar
|
|
||||||
if st.button("Clear Chat", use_container_width=True):
|
|
||||||
st.session_state.messages = []
|
|
||||||
st.rerun()
|
|
||||||
|
|
||||||
|
|
||||||
# Main chat interface
|
|
||||||
st.title("🦙 Chat")
|
|
||||||
|
|
||||||
|
|
||||||
# Initialize chat history
|
|
||||||
if "messages" not in st.session_state:
|
|
||||||
st.session_state.messages = []
|
|
||||||
|
|
||||||
# Display chat messages
|
|
||||||
for message in st.session_state.messages:
|
|
||||||
with st.chat_message(message["role"]):
|
|
||||||
st.markdown(message["content"])
|
|
||||||
|
|
||||||
# Chat input
|
|
||||||
if prompt := st.chat_input("Example: What is Llama Stack?"):
|
|
||||||
# Add user message to chat history
|
|
||||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
|
||||||
|
|
||||||
# Display user message
|
|
||||||
with st.chat_message("user"):
|
|
||||||
st.markdown(prompt)
|
|
||||||
|
|
||||||
# Display assistant response
|
|
||||||
with st.chat_message("assistant"):
|
|
||||||
message_placeholder = st.empty()
|
|
||||||
full_response = ""
|
|
||||||
|
|
||||||
if temperature > 0.0:
|
|
||||||
strategy = {
|
|
||||||
"type": "top_p",
|
|
||||||
"temperature": temperature,
|
|
||||||
"top_p": top_p,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
strategy = {"type": "greedy"}
|
|
||||||
|
|
||||||
response = llama_stack_api.client.inference.chat_completion(
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": system_prompt},
|
|
||||||
{"role": "user", "content": prompt},
|
|
||||||
],
|
|
||||||
model_id=selected_model,
|
|
||||||
stream=stream,
|
|
||||||
sampling_params={
|
|
||||||
"strategy": strategy,
|
|
||||||
"max_tokens": max_tokens,
|
|
||||||
"repetition_penalty": repetition_penalty,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
if stream:
|
|
||||||
for chunk in response:
|
|
||||||
if chunk.event.event_type == "progress":
|
|
||||||
full_response += chunk.event.delta.text
|
|
||||||
message_placeholder.markdown(full_response + "▌")
|
|
||||||
message_placeholder.markdown(full_response)
|
|
||||||
else:
|
|
||||||
full_response = response.completion_message.content
|
|
||||||
message_placeholder.markdown(full_response)
|
|
||||||
|
|
||||||
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
|
||||||
|
|
@ -1,352 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import enum
|
|
||||||
import json
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
import streamlit as st
|
|
||||||
from llama_stack_client import Agent
|
|
||||||
from llama_stack_client.lib.agents.react.agent import ReActAgent
|
|
||||||
from llama_stack_client.lib.agents.react.tool_parser import ReActOutput
|
|
||||||
|
|
||||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
||||||
|
|
||||||
|
|
||||||
class AgentType(enum.Enum):
|
|
||||||
REGULAR = "Regular"
|
|
||||||
REACT = "ReAct"
|
|
||||||
|
|
||||||
|
|
||||||
def tool_chat_page():
|
|
||||||
st.title("🛠 Tools")
|
|
||||||
|
|
||||||
client = llama_stack_api.client
|
|
||||||
models = client.models.list()
|
|
||||||
model_list = [model.identifier for model in models if model.api_model_type == "llm"]
|
|
||||||
|
|
||||||
tool_groups = client.toolgroups.list()
|
|
||||||
tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
|
|
||||||
mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
|
|
||||||
builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
|
|
||||||
selected_vector_stores = []
|
|
||||||
|
|
||||||
def reset_agent():
|
|
||||||
st.session_state.clear()
|
|
||||||
st.cache_resource.clear()
|
|
||||||
|
|
||||||
with st.sidebar:
|
|
||||||
st.title("Configuration")
|
|
||||||
st.subheader("Model")
|
|
||||||
model = st.selectbox(label="Model", options=model_list, on_change=reset_agent, label_visibility="collapsed")
|
|
||||||
|
|
||||||
st.subheader("Available ToolGroups")
|
|
||||||
|
|
||||||
toolgroup_selection = st.pills(
|
|
||||||
label="Built-in tools",
|
|
||||||
options=builtin_tools_list,
|
|
||||||
selection_mode="multi",
|
|
||||||
on_change=reset_agent,
|
|
||||||
format_func=lambda tool: "".join(tool.split("::")[1:]),
|
|
||||||
help="List of built-in tools from your llama stack server.",
|
|
||||||
)
|
|
||||||
|
|
||||||
if "builtin::rag" in toolgroup_selection:
|
|
||||||
vector_stores = llama_stack_api.client.vector_stores.list() or []
|
|
||||||
if not vector_stores:
|
|
||||||
st.info("No vector databases available for selection.")
|
|
||||||
vector_stores = [vector_store.identifier for vector_store in vector_stores]
|
|
||||||
selected_vector_stores = st.multiselect(
|
|
||||||
label="Select Document Collections to use in RAG queries",
|
|
||||||
options=vector_stores,
|
|
||||||
on_change=reset_agent,
|
|
||||||
)
|
|
||||||
|
|
||||||
mcp_selection = st.pills(
|
|
||||||
label="MCP Servers",
|
|
||||||
options=mcp_tools_list,
|
|
||||||
selection_mode="multi",
|
|
||||||
on_change=reset_agent,
|
|
||||||
format_func=lambda tool: "".join(tool.split("::")[1:]),
|
|
||||||
help="List of MCP servers registered to your llama stack server.",
|
|
||||||
)
|
|
||||||
|
|
||||||
toolgroup_selection.extend(mcp_selection)
|
|
||||||
|
|
||||||
grouped_tools = {}
|
|
||||||
total_tools = 0
|
|
||||||
|
|
||||||
for toolgroup_id in toolgroup_selection:
|
|
||||||
tools = client.tools.list(toolgroup_id=toolgroup_id)
|
|
||||||
grouped_tools[toolgroup_id] = [tool.name for tool in tools]
|
|
||||||
total_tools += len(tools)
|
|
||||||
|
|
||||||
st.markdown(f"Active Tools: 🛠 {total_tools}")
|
|
||||||
|
|
||||||
for group_id, tools in grouped_tools.items():
|
|
||||||
with st.expander(f"🔧 Tools from `{group_id}`"):
|
|
||||||
for idx, tool in enumerate(tools, start=1):
|
|
||||||
st.markdown(f"{idx}. `{tool.split(':')[-1]}`")
|
|
||||||
|
|
||||||
st.subheader("Agent Configurations")
|
|
||||||
st.subheader("Agent Type")
|
|
||||||
agent_type = st.radio(
|
|
||||||
label="Select Agent Type",
|
|
||||||
options=["Regular", "ReAct"],
|
|
||||||
on_change=reset_agent,
|
|
||||||
)
|
|
||||||
|
|
||||||
if agent_type == "ReAct":
|
|
||||||
agent_type = AgentType.REACT
|
|
||||||
else:
|
|
||||||
agent_type = AgentType.REGULAR
|
|
||||||
|
|
||||||
max_tokens = st.slider(
|
|
||||||
"Max Tokens",
|
|
||||||
min_value=0,
|
|
||||||
max_value=4096,
|
|
||||||
value=512,
|
|
||||||
step=64,
|
|
||||||
help="The maximum number of tokens to generate",
|
|
||||||
on_change=reset_agent,
|
|
||||||
)
|
|
||||||
|
|
||||||
for i, tool_name in enumerate(toolgroup_selection):
|
|
||||||
if tool_name == "builtin::rag":
|
|
||||||
tool_dict = dict(
|
|
||||||
name="builtin::rag",
|
|
||||||
args={
|
|
||||||
"vector_store_ids": list(selected_vector_stores),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
toolgroup_selection[i] = tool_dict
|
|
||||||
|
|
||||||
@st.cache_resource
|
|
||||||
def create_agent():
|
|
||||||
if "agent_type" in st.session_state and st.session_state.agent_type == AgentType.REACT:
|
|
||||||
return ReActAgent(
|
|
||||||
client=client,
|
|
||||||
model=model,
|
|
||||||
tools=toolgroup_selection,
|
|
||||||
response_format={
|
|
||||||
"type": "json_schema",
|
|
||||||
"json_schema": ReActOutput.model_json_schema(),
|
|
||||||
},
|
|
||||||
sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return Agent(
|
|
||||||
client,
|
|
||||||
model=model,
|
|
||||||
instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
|
|
||||||
tools=toolgroup_selection,
|
|
||||||
sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
|
|
||||||
)
|
|
||||||
|
|
||||||
st.session_state.agent_type = agent_type
|
|
||||||
|
|
||||||
agent = create_agent()
|
|
||||||
|
|
||||||
if "agent_session_id" not in st.session_state:
|
|
||||||
st.session_state["agent_session_id"] = agent.create_session(session_name=f"tool_demo_{uuid.uuid4()}")
|
|
||||||
|
|
||||||
session_id = st.session_state["agent_session_id"]
|
|
||||||
|
|
||||||
if "messages" not in st.session_state:
|
|
||||||
st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
|
|
||||||
|
|
||||||
for msg in st.session_state.messages:
|
|
||||||
with st.chat_message(msg["role"]):
|
|
||||||
st.markdown(msg["content"])
|
|
||||||
|
|
||||||
if prompt := st.chat_input(placeholder=""):
|
|
||||||
with st.chat_message("user"):
|
|
||||||
st.markdown(prompt)
|
|
||||||
|
|
||||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
|
||||||
|
|
||||||
turn_response = agent.create_turn(
|
|
||||||
session_id=session_id,
|
|
||||||
messages=[{"role": "user", "content": prompt}],
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
def response_generator(turn_response):
|
|
||||||
if st.session_state.get("agent_type") == AgentType.REACT:
|
|
||||||
return _handle_react_response(turn_response)
|
|
||||||
else:
|
|
||||||
return _handle_regular_response(turn_response)
|
|
||||||
|
|
||||||
def _handle_react_response(turn_response):
|
|
||||||
current_step_content = ""
|
|
||||||
final_answer = None
|
|
||||||
tool_results = []
|
|
||||||
|
|
||||||
for response in turn_response:
|
|
||||||
if not hasattr(response.event, "payload"):
|
|
||||||
yield (
|
|
||||||
"\n\n🚨 :red[_Llama Stack server Error:_]\n"
|
|
||||||
"The response received is missing an expected `payload` attribute.\n"
|
|
||||||
"This could indicate a malformed response or an internal issue within the server.\n\n"
|
|
||||||
f"Error details: {response}"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
payload = response.event.payload
|
|
||||||
|
|
||||||
if payload.event_type == "step_progress" and hasattr(payload.delta, "text"):
|
|
||||||
current_step_content += payload.delta.text
|
|
||||||
continue
|
|
||||||
|
|
||||||
if payload.event_type == "step_complete":
|
|
||||||
step_details = payload.step_details
|
|
||||||
|
|
||||||
if step_details.step_type == "inference":
|
|
||||||
yield from _process_inference_step(current_step_content, tool_results, final_answer)
|
|
||||||
current_step_content = ""
|
|
||||||
elif step_details.step_type == "tool_execution":
|
|
||||||
tool_results = _process_tool_execution(step_details, tool_results)
|
|
||||||
current_step_content = ""
|
|
||||||
else:
|
|
||||||
current_step_content = ""
|
|
||||||
|
|
||||||
if not final_answer and tool_results:
|
|
||||||
yield from _format_tool_results_summary(tool_results)
|
|
||||||
|
|
||||||
def _process_inference_step(current_step_content, tool_results, final_answer):
|
|
||||||
try:
|
|
||||||
react_output_data = json.loads(current_step_content)
|
|
||||||
thought = react_output_data.get("thought")
|
|
||||||
action = react_output_data.get("action")
|
|
||||||
answer = react_output_data.get("answer")
|
|
||||||
|
|
||||||
if answer and answer != "null" and answer is not None:
|
|
||||||
final_answer = answer
|
|
||||||
|
|
||||||
if thought:
|
|
||||||
with st.expander("🤔 Thinking...", expanded=False):
|
|
||||||
st.markdown(f":grey[__{thought}__]")
|
|
||||||
|
|
||||||
if action and isinstance(action, dict):
|
|
||||||
tool_name = action.get("tool_name")
|
|
||||||
tool_params = action.get("tool_params")
|
|
||||||
with st.expander(f'🛠 Action: Using tool "{tool_name}"', expanded=False):
|
|
||||||
st.json(tool_params)
|
|
||||||
|
|
||||||
if answer and answer != "null" and answer is not None:
|
|
||||||
yield f"\n\n✅ **Final Answer:**\n{answer}"
|
|
||||||
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
yield f"\n\nFailed to parse ReAct step content:\n```json\n{current_step_content}\n```"
|
|
||||||
except Exception as e:
|
|
||||||
yield f"\n\nFailed to process ReAct step: {e}\n```json\n{current_step_content}\n```"
|
|
||||||
|
|
||||||
return final_answer
|
|
||||||
|
|
||||||
def _process_tool_execution(step_details, tool_results):
|
|
||||||
try:
|
|
||||||
if hasattr(step_details, "tool_responses") and step_details.tool_responses:
|
|
||||||
for tool_response in step_details.tool_responses:
|
|
||||||
tool_name = tool_response.tool_name
|
|
||||||
content = tool_response.content
|
|
||||||
tool_results.append((tool_name, content))
|
|
||||||
with st.expander(f'⚙️ Observation (Result from "{tool_name}")', expanded=False):
|
|
||||||
try:
|
|
||||||
parsed_content = json.loads(content)
|
|
||||||
st.json(parsed_content)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
st.code(content, language=None)
|
|
||||||
else:
|
|
||||||
with st.expander("⚙️ Observation", expanded=False):
|
|
||||||
st.markdown(":grey[_Tool execution step completed, but no response data found._]")
|
|
||||||
except Exception as e:
|
|
||||||
with st.expander("⚙️ Error in Tool Execution", expanded=False):
|
|
||||||
st.markdown(f":red[_Error processing tool execution: {str(e)}_]")
|
|
||||||
|
|
||||||
return tool_results
|
|
||||||
|
|
||||||
def _format_tool_results_summary(tool_results):
|
|
||||||
yield "\n\n**Here's what I found:**\n"
|
|
||||||
for tool_name, content in tool_results:
|
|
||||||
try:
|
|
||||||
parsed_content = json.loads(content)
|
|
||||||
|
|
||||||
if tool_name == "web_search" and "top_k" in parsed_content:
|
|
||||||
yield from _format_web_search_results(parsed_content)
|
|
||||||
elif "results" in parsed_content and isinstance(parsed_content["results"], list):
|
|
||||||
yield from _format_results_list(parsed_content["results"])
|
|
||||||
elif isinstance(parsed_content, dict) and len(parsed_content) > 0:
|
|
||||||
yield from _format_dict_results(parsed_content)
|
|
||||||
elif isinstance(parsed_content, list) and len(parsed_content) > 0:
|
|
||||||
yield from _format_list_results(parsed_content)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
yield f"\n**{tool_name}** was used but returned complex data. Check the observation for details.\n"
|
|
||||||
except (TypeError, AttributeError, KeyError, IndexError) as e:
|
|
||||||
print(f"Error processing {tool_name} result: {type(e).__name__}: {e}")
|
|
||||||
|
|
||||||
def _format_web_search_results(parsed_content):
|
|
||||||
for i, result in enumerate(parsed_content["top_k"], 1):
|
|
||||||
if i <= 3:
|
|
||||||
title = result.get("title", "Untitled")
|
|
||||||
url = result.get("url", "")
|
|
||||||
content_text = result.get("content", "").strip()
|
|
||||||
yield f"\n- **{title}**\n {content_text}\n [Source]({url})\n"
|
|
||||||
|
|
||||||
def _format_results_list(results):
|
|
||||||
for i, result in enumerate(results, 1):
|
|
||||||
if i <= 3:
|
|
||||||
if isinstance(result, dict):
|
|
||||||
name = result.get("name", result.get("title", "Result " + str(i)))
|
|
||||||
description = result.get("description", result.get("content", result.get("summary", "")))
|
|
||||||
yield f"\n- **{name}**\n {description}\n"
|
|
||||||
else:
|
|
||||||
yield f"\n- {result}\n"
|
|
||||||
|
|
||||||
def _format_dict_results(parsed_content):
|
|
||||||
yield "\n```\n"
|
|
||||||
for key, value in list(parsed_content.items())[:5]:
|
|
||||||
if isinstance(value, str) and len(value) < 100:
|
|
||||||
yield f"{key}: {value}\n"
|
|
||||||
else:
|
|
||||||
yield f"{key}: [Complex data]\n"
|
|
||||||
yield "```\n"
|
|
||||||
|
|
||||||
def _format_list_results(parsed_content):
|
|
||||||
yield "\n"
|
|
||||||
for _, item in enumerate(parsed_content[:3], 1):
|
|
||||||
if isinstance(item, str):
|
|
||||||
yield f"- {item}\n"
|
|
||||||
elif isinstance(item, dict) and "text" in item:
|
|
||||||
yield f"- {item['text']}\n"
|
|
||||||
elif isinstance(item, dict) and len(item) > 0:
|
|
||||||
first_value = next(iter(item.values()))
|
|
||||||
if isinstance(first_value, str) and len(first_value) < 100:
|
|
||||||
yield f"- {first_value}\n"
|
|
||||||
|
|
||||||
def _handle_regular_response(turn_response):
|
|
||||||
for response in turn_response:
|
|
||||||
if hasattr(response.event, "payload"):
|
|
||||||
print(response.event.payload)
|
|
||||||
if response.event.payload.event_type == "step_progress":
|
|
||||||
if hasattr(response.event.payload.delta, "text"):
|
|
||||||
yield response.event.payload.delta.text
|
|
||||||
if response.event.payload.event_type == "step_complete":
|
|
||||||
if response.event.payload.step_details.step_type == "tool_execution":
|
|
||||||
if response.event.payload.step_details.tool_calls:
|
|
||||||
tool_name = str(response.event.payload.step_details.tool_calls[0].tool_name)
|
|
||||||
yield f'\n\n🛠 :grey[_Using "{tool_name}" tool:_]\n\n'
|
|
||||||
else:
|
|
||||||
yield "No tool_calls present in step_details"
|
|
||||||
else:
|
|
||||||
yield f"Error occurred in the Llama Stack Cluster: {response}"
|
|
||||||
|
|
||||||
with st.chat_message("assistant"):
|
|
||||||
response_content = st.write_stream(response_generator(turn_response))
|
|
||||||
|
|
||||||
st.session_state.messages.append({"role": "assistant", "content": response_content})
|
|
||||||
|
|
||||||
|
|
||||||
tool_chat_page()
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
llama-stack>=0.2.1
|
|
||||||
llama-stack-client>=0.2.1
|
|
||||||
pandas
|
|
||||||
streamlit
|
|
||||||
streamlit-option-menu
|
|
||||||
13
uv.lock
generated
13
uv.lock
generated
|
|
@ -1963,14 +1963,6 @@ dependencies = [
|
||||||
{ name = "uvicorn" },
|
{ name = "uvicorn" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.optional-dependencies]
|
|
||||||
ui = [
|
|
||||||
{ name = "llama-stack-client" },
|
|
||||||
{ name = "pandas" },
|
|
||||||
{ name = "streamlit" },
|
|
||||||
{ name = "streamlit-option-menu" },
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.dev-dependencies]
|
[package.dev-dependencies]
|
||||||
benchmark = [
|
benchmark = [
|
||||||
{ name = "locust" },
|
{ name = "locust" },
|
||||||
|
|
@ -2097,11 +2089,9 @@ requires-dist = [
|
||||||
{ name = "jinja2", specifier = ">=3.1.6" },
|
{ name = "jinja2", specifier = ">=3.1.6" },
|
||||||
{ name = "jsonschema" },
|
{ name = "jsonschema" },
|
||||||
{ name = "llama-stack-client", specifier = ">=0.3.0" },
|
{ name = "llama-stack-client", specifier = ">=0.3.0" },
|
||||||
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.3.0" },
|
|
||||||
{ name = "openai", specifier = ">=2.5.0" },
|
{ name = "openai", specifier = ">=2.5.0" },
|
||||||
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
|
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
|
||||||
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" },
|
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" },
|
||||||
{ name = "pandas", marker = "extra == 'ui'" },
|
|
||||||
{ name = "pillow" },
|
{ name = "pillow" },
|
||||||
{ name = "prompt-toolkit" },
|
{ name = "prompt-toolkit" },
|
||||||
{ name = "pydantic", specifier = ">=2.11.9" },
|
{ name = "pydantic", specifier = ">=2.11.9" },
|
||||||
|
|
@ -2111,13 +2101,10 @@ requires-dist = [
|
||||||
{ name = "rich" },
|
{ name = "rich" },
|
||||||
{ name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
|
{ name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
|
||||||
{ name = "starlette" },
|
{ name = "starlette" },
|
||||||
{ name = "streamlit", marker = "extra == 'ui'" },
|
|
||||||
{ name = "streamlit-option-menu", marker = "extra == 'ui'" },
|
|
||||||
{ name = "termcolor" },
|
{ name = "termcolor" },
|
||||||
{ name = "tiktoken" },
|
{ name = "tiktoken" },
|
||||||
{ name = "uvicorn", specifier = ">=0.34.0" },
|
{ name = "uvicorn", specifier = ">=0.34.0" },
|
||||||
]
|
]
|
||||||
provides-extras = ["ui"]
|
|
||||||
|
|
||||||
[package.metadata.requires-dev]
|
[package.metadata.requires-dev]
|
||||||
benchmark = [{ name = "locust", specifier = ">=2.39.1" }]
|
benchmark = [{ name = "locust", specifier = ">=2.39.1" }]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue