expander refactor

This commit is contained in:
Xi Yan 2024-12-03 16:20:31 -08:00
parent e245f459bb
commit 92f79d4dfb

View file

@ -13,23 +13,9 @@ import streamlit as st
from modules.api import llama_stack_api
def native_evaluation_page():
st.set_page_config(page_title="Evaluations (Generation + Scoring)", page_icon="🦙")
st.title("📊 Evaluations (Generation + Scoring)")
# Create tabs
task_tab, candidate_tab, params_tab, run_tab = st.tabs(
[
"(1) Select Eval Task",
"(2) Define Eval Candidate",
"(3) Define Scoring Parameters",
"(4) Run Evaluation",
]
)
with task_tab:
def select_eval_task_1():
# Select Eval Tasks
st.subheader("1. Choose An Eval Task")
eval_tasks = llama_stack_api.client.eval_tasks.list()
eval_tasks = {et.identifier: et for et in eval_tasks}
eval_tasks_names = list(eval_tasks.keys())
@ -38,9 +24,16 @@ def native_evaluation_page():
options=eval_tasks_names,
help="Choose an eval task. Each eval task is parameterized by a dataset, and list of scoring functions.",
)
with st.expander("View Eval Task"):
st.json(eval_tasks[selected_eval_task], expanded=True)
with candidate_tab:
st.session_state["selected_eval_task"] = selected_eval_task
st.session_state["eval_tasks"] = eval_tasks
def define_eval_candidate_2():
st.subheader("2. Define Eval Candidate")
with st.expander("Define Eval Candidate"):
# Define Eval Candidate
candidate_type = st.radio("Candidate Type", ["model", "agent"])
@ -140,17 +133,26 @@ def native_evaluation_page():
"enable_session_persistence": False,
},
}
st.session_state["eval_candidate"] = eval_candidate
with params_tab:
def define_scoring_params_3():
if not st.session_state.get("selected_eval_candidate_2_next", None):
return
st.write("(Optional) Define scoring function parameters here")
with run_tab:
def run_evaluation_4():
st.subheader("3. Run Evaluation")
# Add info box to explain configurations being used
st.info(
"""
Review the configurations that will be used for this evaluation run, make any necessary changes, and then click the "Run Evaluation" button.
"""
)
selected_eval_task = st.session_state["selected_eval_task"]
eval_tasks = st.session_state["eval_tasks"]
eval_candidate = st.session_state["eval_candidate"]
dataset_id = eval_tasks[selected_eval_task].dataset_id
rows = llama_stack_api.client.datasetio.get_rows_paginated(
@ -172,13 +174,10 @@ def native_evaluation_page():
"eval_candidate": eval_candidate,
"scoring_params": {},
}
st.markdown("##### Evaluation Task")
st.write("Go back to (1) Select Eval Task to make changes to the eval task. ")
with st.expander("View Evaluation Task"):
st.json(eval_tasks[selected_eval_task], expanded=True)
st.markdown("##### Evaluation Task Configuration")
st.write(
"Go back to (2) Define Eval Candidate and (3) Define Scoring Parameters to make changes to the configuration. "
)
with st.expander("View Evaluation Task Configuration"):
st.json(eval_task_config, expanded=True)
# Add run button and handle evaluation
@ -219,9 +218,7 @@ def native_evaluation_page():
for scoring_fn in eval_tasks[selected_eval_task].scoring_functions:
if scoring_fn not in output_res:
output_res[scoring_fn] = []
output_res[scoring_fn].append(
eval_res.scores[scoring_fn].score_rows[0]
)
output_res[scoring_fn].append(eval_res.scores[scoring_fn].score_rows[0])
progress_text_container.write(
f"Expand to see current processed result ({i+1}/{len(rows)})"
@ -236,4 +233,24 @@ def native_evaluation_page():
st.dataframe(output_df)
def native_evaluation_page():
st.set_page_config(page_title="Evaluations (Generation + Scoring)", page_icon="🦙")
st.title("📊 Evaluations (Generation + Scoring)")
# Create tabs
# task_tab, candidate_tab, params_tab, run_tab = st.tabs(
# [
# "(1) Select Eval Task",
# "(2) Define Eval Candidate",
# "(3) Define Scoring Parameters",
# "(4) Run Evaluation",
# ]
# )
select_eval_task_1()
define_eval_candidate_2()
define_scoring_params_3()
run_evaluation_4()
native_evaluation_page()