From c544e4b0158fb367e71821d3eeed3426553d8877 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 27 Nov 2024 15:11:27 -0800 Subject: [PATCH] chat playground --- llama_stack/distribution/ui/app.py | 171 ++---------------- .../distribution/ui/modules/__init__.py | 5 + llama_stack/distribution/ui/modules/api.py | 5 +- llama_stack/distribution/ui/page/__init__.py | 5 + .../ui/page/evaluations/__init__.py | 5 + .../ui/page/evaluations/app_eval.py | 148 +++++++++++++++ .../ui/page/playground/__init__.py | 5 + .../distribution/ui/page/playground/chat.py | 109 +++++++++++ 8 files changed, 295 insertions(+), 158 deletions(-) create mode 100644 llama_stack/distribution/ui/modules/__init__.py create mode 100644 llama_stack/distribution/ui/page/__init__.py create mode 100644 llama_stack/distribution/ui/page/evaluations/__init__.py create mode 100644 llama_stack/distribution/ui/page/evaluations/app_eval.py create mode 100644 llama_stack/distribution/ui/page/playground/__init__.py create mode 100644 llama_stack/distribution/ui/page/playground/chat.py diff --git a/llama_stack/distribution/ui/app.py b/llama_stack/distribution/ui/app.py index 763b126a7..2943e709c 100644 --- a/llama_stack/distribution/ui/app.py +++ b/llama_stack/distribution/ui/app.py @@ -3,170 +3,27 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. - -import json - -import pandas as pd - import streamlit as st -from modules.api import LlamaStackEvaluation - -from modules.utils import process_dataset - -EVALUATION_API = LlamaStackEvaluation() - def main(): - # Add collapsible sidebar - with st.sidebar: - # Add collapse button - if "sidebar_state" not in st.session_state: - st.session_state.sidebar_state = True - - if st.session_state.sidebar_state: - st.title("Navigation") - page = st.radio( - "Select a Page", - ["Application Evaluation"], - index=0, - ) - else: - page = "Application Evaluation" # Default page when sidebar is collapsed - - # Main content area - st.title("🦙 Llama Stack Evaluations") - - if page == "Application Evaluation": - application_evaluation_page() - - -def application_evaluation_page(): - # File uploader - uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"]) - - if uploaded_file is None: - st.error("No file uploaded") - return - - # Process uploaded file - df = process_dataset(uploaded_file) - if df is None: - st.error("Error processing file") - return - - # Display dataset information - st.success("Dataset loaded successfully!") - - # Display dataframe preview - st.subheader("Dataset Preview") - st.dataframe(df) - - # Select Scoring Functions to Run Evaluation On - st.subheader("Select Scoring Functions") - scoring_functions = EVALUATION_API.list_scoring_functions() - scoring_functions = {sf.identifier: sf for sf in scoring_functions} - scoring_functions_names = list(scoring_functions.keys()) - selected_scoring_functions = st.multiselect( - "Choose one or more scoring functions", - options=scoring_functions_names, - help="Choose one or more scoring functions.", + # Evaluation pages + application_evaluation_page = st.Page( + "page/evaluations/app_eval.py", + title="Application Evaluation", + icon="🦙", + default=False, ) - available_models = EVALUATION_API.list_models() - available_models = [m.identifier for m in available_models] + # Playground pages + chat_page = st.Page( + "page/playground/chat.py", title="Chat", icon="💬", default=True + ) - scoring_params = {} - if selected_scoring_functions: - st.write("Selected:") - for scoring_fn_id in selected_scoring_functions: - scoring_fn = scoring_functions[scoring_fn_id] - st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}") - new_params = None - if scoring_fn.params: - new_params = {} - for param_name, param_value in scoring_fn.params.to_dict().items(): - if param_name == "type": - new_params[param_name] = param_value - continue - - if param_name == "judge_model": - value = st.selectbox( - f"Select **{param_name}** for {scoring_fn_id}", - options=available_models, - index=0, - key=f"{scoring_fn_id}_{param_name}", - ) - new_params[param_name] = value - else: - value = st.text_area( - f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format", - value=json.dumps(param_value, indent=2), - height=80, - ) - try: - new_params[param_name] = json.loads(value) - except json.JSONDecodeError: - st.error( - f"Invalid JSON for **{param_name}** in {scoring_fn_id}" - ) - - st.json(new_params) - scoring_params[scoring_fn_id] = new_params - - # Add run evaluation button & slider - total_rows = len(df) - num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows) - - if st.button("Run Evaluation"): - progress_text = "Running evaluation..." - progress_bar = st.progress(0, text=progress_text) - rows = df.to_dict(orient="records") - if num_rows < total_rows: - rows = rows[:num_rows] - - # Create separate containers for progress text and results - progress_text_container = st.empty() - results_container = st.empty() - output_res = {} - for i, r in enumerate(rows): - # Update progress - progress = i / len(rows) - progress_bar.progress(progress, text=progress_text) - - # Run evaluation for current row - score_res = EVALUATION_API.run_scoring( - r, - scoring_function_ids=selected_scoring_functions, - scoring_params=scoring_params, - ) - - for k in r.keys(): - if k not in output_res: - output_res[k] = [] - output_res[k].append(r[k]) - - for fn_id in selected_scoring_functions: - if fn_id not in output_res: - output_res[fn_id] = [] - output_res[fn_id].append(score_res.results[fn_id].score_rows[0]) - - # Display current row results using separate containers - progress_text_container.write( - f"Expand to see current processed result ({i+1}/{len(rows)})" - ) - results_container.json( - score_res.to_json(), - expanded=2, - ) - - progress_bar.progress(1.0, text="Evaluation complete!") - - # Display results in dataframe - if output_res: - output_df = pd.DataFrame(output_res) - st.subheader("Evaluation Results") - st.dataframe(output_df) + pg = st.navigation( + {"Evaluations": [application_evaluation_page], "Playground": [chat_page]} + ) + pg.run() if __name__ == "__main__": diff --git a/llama_stack/distribution/ui/modules/__init__.py b/llama_stack/distribution/ui/modules/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/distribution/ui/modules/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py index a8d8bf37d..797480a92 100644 --- a/llama_stack/distribution/ui/modules/api.py +++ b/llama_stack/distribution/ui/modules/api.py @@ -11,7 +11,7 @@ from typing import Optional from llama_stack_client import LlamaStackClient -class LlamaStackEvaluation: +class LlamaStackApi: def __init__(self): self.client = LlamaStackClient( base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:5000"), @@ -39,3 +39,6 @@ class LlamaStackEvaluation: return self.client.scoring.score( input_rows=[row], scoring_functions=scoring_params ) + + +llama_stack_api = LlamaStackApi() diff --git a/llama_stack/distribution/ui/page/__init__.py b/llama_stack/distribution/ui/page/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/distribution/ui/page/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/distribution/ui/page/evaluations/__init__.py b/llama_stack/distribution/ui/page/evaluations/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/distribution/ui/page/evaluations/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/distribution/ui/page/evaluations/app_eval.py b/llama_stack/distribution/ui/page/evaluations/app_eval.py new file mode 100644 index 000000000..7c093784d --- /dev/null +++ b/llama_stack/distribution/ui/page/evaluations/app_eval.py @@ -0,0 +1,148 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json + +import pandas as pd +import streamlit as st + +from modules.api import llama_stack_api +from modules.utils import process_dataset + + +def application_evaluation_page(): + + st.set_page_config(page_title="Application Evaluation", page_icon="🦙") + st.title("🦙 Llama Stack Evaluations") + + # File uploader + uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"]) + + if uploaded_file is None: + st.error("No file uploaded") + return + + # Process uploaded file + df = process_dataset(uploaded_file) + if df is None: + st.error("Error processing file") + return + + # Display dataset information + st.success("Dataset loaded successfully!") + + # Display dataframe preview + st.subheader("Dataset Preview") + st.dataframe(df) + + # Select Scoring Functions to Run Evaluation On + st.subheader("Select Scoring Functions") + scoring_functions = llama_stack_api.list_scoring_functions() + scoring_functions = {sf.identifier: sf for sf in scoring_functions} + scoring_functions_names = list(scoring_functions.keys()) + selected_scoring_functions = st.multiselect( + "Choose one or more scoring functions", + options=scoring_functions_names, + help="Choose one or more scoring functions.", + ) + + available_models = llama_stack_api.list_models() + available_models = [m.identifier for m in available_models] + + scoring_params = {} + if selected_scoring_functions: + st.write("Selected:") + for scoring_fn_id in selected_scoring_functions: + scoring_fn = scoring_functions[scoring_fn_id] + st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}") + new_params = None + if scoring_fn.params: + new_params = {} + for param_name, param_value in scoring_fn.params.to_dict().items(): + if param_name == "type": + new_params[param_name] = param_value + continue + + if param_name == "judge_model": + value = st.selectbox( + f"Select **{param_name}** for {scoring_fn_id}", + options=available_models, + index=0, + key=f"{scoring_fn_id}_{param_name}", + ) + new_params[param_name] = value + else: + value = st.text_area( + f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format", + value=json.dumps(param_value, indent=2), + height=80, + ) + try: + new_params[param_name] = json.loads(value) + except json.JSONDecodeError: + st.error( + f"Invalid JSON for **{param_name}** in {scoring_fn_id}" + ) + + st.json(new_params) + scoring_params[scoring_fn_id] = new_params + + # Add run evaluation button & slider + total_rows = len(df) + num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows) + + if st.button("Run Evaluation"): + progress_text = "Running evaluation..." + progress_bar = st.progress(0, text=progress_text) + rows = df.to_dict(orient="records") + if num_rows < total_rows: + rows = rows[:num_rows] + + # Create separate containers for progress text and results + progress_text_container = st.empty() + results_container = st.empty() + output_res = {} + for i, r in enumerate(rows): + # Update progress + progress = i / len(rows) + progress_bar.progress(progress, text=progress_text) + + # Run evaluation for current row + score_res = llama_stack_api.run_scoring( + r, + scoring_function_ids=selected_scoring_functions, + scoring_params=scoring_params, + ) + + for k in r.keys(): + if k not in output_res: + output_res[k] = [] + output_res[k].append(r[k]) + + for fn_id in selected_scoring_functions: + if fn_id not in output_res: + output_res[fn_id] = [] + output_res[fn_id].append(score_res.results[fn_id].score_rows[0]) + + # Display current row results using separate containers + progress_text_container.write( + f"Expand to see current processed result ({i+1}/{len(rows)})" + ) + results_container.json( + score_res.to_json(), + expanded=2, + ) + + progress_bar.progress(1.0, text="Evaluation complete!") + + # Display results in dataframe + if output_res: + output_df = pd.DataFrame(output_res) + st.subheader("Evaluation Results") + st.dataframe(output_df) + + +application_evaluation_page() diff --git a/llama_stack/distribution/ui/page/playground/__init__.py b/llama_stack/distribution/ui/page/playground/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/distribution/ui/page/playground/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/distribution/ui/page/playground/chat.py b/llama_stack/distribution/ui/page/playground/chat.py new file mode 100644 index 000000000..6e96125ca --- /dev/null +++ b/llama_stack/distribution/ui/page/playground/chat.py @@ -0,0 +1,109 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import streamlit as st +from modules.api import llama_stack_api + +# Sidebar configurations +with st.sidebar: + st.header("Configuration") + available_models = llama_stack_api.list_models() + available_models = [model.identifier for model in available_models] + selected_model = st.selectbox( + "Choose a model", + available_models, + index=0, + ) + + temperature = st.slider( + "Temperature", + min_value=0.0, + max_value=1.0, + value=0.0, + step=0.1, + help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable", + ) + + top_p = st.slider( + "Top P", + min_value=0.0, + max_value=1.0, + value=0.95, + step=0.1, + ) + + max_tokens = st.slider( + "Max Tokens", + min_value=0, + max_value=4096, + value=512, + step=1, + help="The maximum number of tokens to generate", + ) + + repetition_penalty = st.slider( + "Repetition Penalty", + min_value=1.0, + max_value=2.0, + value=1.0, + step=0.1, + help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.", + ) + + stream = st.checkbox("Stream", value=True) + + +# Main chat interface +st.title("🦙 Chat") + +# Initialize chat history +if "messages" not in st.session_state: + st.session_state.messages = [] + +# Display chat messages +for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + +# Chat input +if prompt := st.chat_input("Example: What is Llama Stack?"): + # Add user message to chat history + st.session_state.messages.append({"role": "user", "content": prompt}) + + # Display user message + with st.chat_message("user"): + st.markdown(prompt) + + # Display assistant response + with st.chat_message("assistant"): + message_placeholder = st.empty() + full_response = "" + + response = llama_stack_api.client.inference.chat_completion( + messages=[{"role": "user", "content": prompt}], + model_id=selected_model, + stream=stream, + sampling_params={ + "temperature": temperature, + "top_p": top_p, + "max_tokens": max_tokens, + "repetition_penalty": repetition_penalty, + }, + ) + + if stream: + for chunk in response: + if chunk.event.event_type == "progress": + full_response += chunk.event.delta + message_placeholder.markdown(full_response + "▌") + message_placeholder.markdown(full_response) + else: + full_response = response + message_placeholder.markdown(full_response.completion_message.content) + + st.session_state.messages.append( + {"role": "assistant", "content": full_response} + )