diff --git a/llama_stack/distribution/ui/README.md b/llama_stack/distribution/ui/README.md new file mode 100644 index 000000000..a91883067 --- /dev/null +++ b/llama_stack/distribution/ui/README.md @@ -0,0 +1,11 @@ +# LLama Stack UI + +[!NOTE] This is a work in progress. + +## Running Streamlit App + +``` +cd llama_stack/distribution/ui +pip install -r requirements.txt +streamlit run app.py +``` diff --git a/llama_stack/distribution/ui/__init__.py b/llama_stack/distribution/ui/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/distribution/ui/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/distribution/ui/app.py b/llama_stack/distribution/ui/app.py new file mode 100644 index 000000000..763b126a7 --- /dev/null +++ b/llama_stack/distribution/ui/app.py @@ -0,0 +1,173 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json + +import pandas as pd + +import streamlit as st + +from modules.api import LlamaStackEvaluation + +from modules.utils import process_dataset + +EVALUATION_API = LlamaStackEvaluation() + + +def main(): + # Add collapsible sidebar + with st.sidebar: + # Add collapse button + if "sidebar_state" not in st.session_state: + st.session_state.sidebar_state = True + + if st.session_state.sidebar_state: + st.title("Navigation") + page = st.radio( + "Select a Page", + ["Application Evaluation"], + index=0, + ) + else: + page = "Application Evaluation" # Default page when sidebar is collapsed + + # Main content area + st.title("🦙 Llama Stack Evaluations") + + if page == "Application Evaluation": + application_evaluation_page() + + +def application_evaluation_page(): + # File uploader + uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"]) + + if uploaded_file is None: + st.error("No file uploaded") + return + + # Process uploaded file + df = process_dataset(uploaded_file) + if df is None: + st.error("Error processing file") + return + + # Display dataset information + st.success("Dataset loaded successfully!") + + # Display dataframe preview + st.subheader("Dataset Preview") + st.dataframe(df) + + # Select Scoring Functions to Run Evaluation On + st.subheader("Select Scoring Functions") + scoring_functions = EVALUATION_API.list_scoring_functions() + scoring_functions = {sf.identifier: sf for sf in scoring_functions} + scoring_functions_names = list(scoring_functions.keys()) + selected_scoring_functions = st.multiselect( + "Choose one or more scoring functions", + options=scoring_functions_names, + help="Choose one or more scoring functions.", + ) + + available_models = EVALUATION_API.list_models() + available_models = [m.identifier for m in available_models] + + scoring_params = {} + if selected_scoring_functions: + st.write("Selected:") + for scoring_fn_id in selected_scoring_functions: + scoring_fn = scoring_functions[scoring_fn_id] + st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}") + new_params = None + if scoring_fn.params: + new_params = {} + for param_name, param_value in scoring_fn.params.to_dict().items(): + if param_name == "type": + new_params[param_name] = param_value + continue + + if param_name == "judge_model": + value = st.selectbox( + f"Select **{param_name}** for {scoring_fn_id}", + options=available_models, + index=0, + key=f"{scoring_fn_id}_{param_name}", + ) + new_params[param_name] = value + else: + value = st.text_area( + f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format", + value=json.dumps(param_value, indent=2), + height=80, + ) + try: + new_params[param_name] = json.loads(value) + except json.JSONDecodeError: + st.error( + f"Invalid JSON for **{param_name}** in {scoring_fn_id}" + ) + + st.json(new_params) + scoring_params[scoring_fn_id] = new_params + + # Add run evaluation button & slider + total_rows = len(df) + num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows) + + if st.button("Run Evaluation"): + progress_text = "Running evaluation..." + progress_bar = st.progress(0, text=progress_text) + rows = df.to_dict(orient="records") + if num_rows < total_rows: + rows = rows[:num_rows] + + # Create separate containers for progress text and results + progress_text_container = st.empty() + results_container = st.empty() + output_res = {} + for i, r in enumerate(rows): + # Update progress + progress = i / len(rows) + progress_bar.progress(progress, text=progress_text) + + # Run evaluation for current row + score_res = EVALUATION_API.run_scoring( + r, + scoring_function_ids=selected_scoring_functions, + scoring_params=scoring_params, + ) + + for k in r.keys(): + if k not in output_res: + output_res[k] = [] + output_res[k].append(r[k]) + + for fn_id in selected_scoring_functions: + if fn_id not in output_res: + output_res[fn_id] = [] + output_res[fn_id].append(score_res.results[fn_id].score_rows[0]) + + # Display current row results using separate containers + progress_text_container.write( + f"Expand to see current processed result ({i+1}/{len(rows)})" + ) + results_container.json( + score_res.to_json(), + expanded=2, + ) + + progress_bar.progress(1.0, text="Evaluation complete!") + + # Display results in dataframe + if output_res: + output_df = pd.DataFrame(output_res) + st.subheader("Evaluation Results") + st.dataframe(output_df) + + +if __name__ == "__main__": + main() diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py new file mode 100644 index 000000000..a8d8bf37d --- /dev/null +++ b/llama_stack/distribution/ui/modules/api.py @@ -0,0 +1,41 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os + +from typing import Optional + +from llama_stack_client import LlamaStackClient + + +class LlamaStackEvaluation: + def __init__(self): + self.client = LlamaStackClient( + base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:5000"), + provider_data={ + "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""), + "together_api_key": os.environ.get("TOGETHER_API_KEY", ""), + "openai_api_key": os.environ.get("OPENAI_API_KEY", ""), + }, + ) + + def list_scoring_functions(self): + """List all available scoring functions""" + return self.client.scoring_functions.list() + + def list_models(self): + """List all available judge models""" + return self.client.models.list() + + def run_scoring( + self, row, scoring_function_ids: list[str], scoring_params: Optional[dict] + ): + """Run scoring on a single row""" + if not scoring_params: + scoring_params = {fn_id: None for fn_id in scoring_function_ids} + return self.client.scoring.score( + input_rows=[row], scoring_functions=scoring_params + ) diff --git a/llama_stack/distribution/ui/modules/utils.py b/llama_stack/distribution/ui/modules/utils.py new file mode 100644 index 000000000..f8da2e54e --- /dev/null +++ b/llama_stack/distribution/ui/modules/utils.py @@ -0,0 +1,31 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os + +import pandas as pd +import streamlit as st + + +def process_dataset(file): + if file is None: + return "No file uploaded", None + + try: + # Determine file type and read accordingly + file_ext = os.path.splitext(file.name)[1].lower() + if file_ext == ".csv": + df = pd.read_csv(file) + elif file_ext in [".xlsx", ".xls"]: + df = pd.read_excel(file) + else: + return "Unsupported file format. Please upload a CSV or Excel file.", None + + return df + + except Exception as e: + st.error(f"Error processing file: {str(e)}") + return None diff --git a/llama_stack/distribution/ui/requirements.txt b/llama_stack/distribution/ui/requirements.txt new file mode 100644 index 000000000..c03959444 --- /dev/null +++ b/llama_stack/distribution/ui/requirements.txt @@ -0,0 +1,3 @@ +streamlit +pandas +llama-stack-client>=0.0.55