forked from phoenix-oss/llama-stack-mirror
fix!: update eval-tasks -> benchmarks (#1032)
# What does this PR do? - Update `/eval-tasks` to `/benchmarks` - ⚠️ Remove differentiation between `app` v.s. `benchmark` eval task config. Now we only have `BenchmarkConfig`. The overloaded `benchmark` is confusing and do not add any value. Backward compatibility is being kept as the "type" is not being used anywhere. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - This change is backward compatible - Run notebook test with ``` pytest -v -s --nbval-lax ./docs/getting_started.ipynb pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb ``` <img width="846" alt="image" src="https://github.com/user-attachments/assets/d2fc06a7-593a-444f-bc1f-10ab9b0c843d" /> [//]: # (## Documentation) [//]: # (- [ ] Added a Changelog entry if the change is significant) --------- Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com> Signed-off-by: Ben Browning <bbrownin@redhat.com> Signed-off-by: Sébastien Han <seb@redhat.com> Signed-off-by: reidliu <reid201711@gmail.com> Co-authored-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com> Co-authored-by: Ben Browning <ben324@gmail.com> Co-authored-by: Sébastien Han <seb@redhat.com> Co-authored-by: Reid <61492567+reidliu41@users.noreply.github.com> Co-authored-by: reidliu <reid201711@gmail.com> Co-authored-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
parent
225dd38e5c
commit
8b655e3cd2
60 changed files with 2622 additions and 1910 deletions
|
@ -8,12 +8,12 @@ import streamlit as st
|
|||
from modules.api import llama_stack_api
|
||||
|
||||
|
||||
def eval_tasks():
|
||||
# Eval Tasks Section
|
||||
st.header("Eval Tasks")
|
||||
def benchmarks():
|
||||
# Benchmarks Section
|
||||
st.header("Benchmarks")
|
||||
|
||||
eval_tasks_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.eval_tasks.list()}
|
||||
benchmarks_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.benchmarks.list()}
|
||||
|
||||
if len(eval_tasks_info) > 0:
|
||||
selected_eval_task = st.selectbox("Select an eval task", list(eval_tasks_info.keys()), key="eval_task_inspect")
|
||||
st.json(eval_tasks_info[selected_eval_task], expanded=True)
|
||||
if len(benchmarks_info) > 0:
|
||||
selected_benchmark = st.selectbox("Select an eval task", list(benchmarks_info.keys()), key="benchmark_inspect")
|
||||
st.json(benchmarks_info[selected_benchmark], expanded=True)
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from page.distribution.benchmarks import benchmarks
|
||||
from page.distribution.datasets import datasets
|
||||
from page.distribution.eval_tasks import eval_tasks
|
||||
from page.distribution.models import models
|
||||
from page.distribution.scoring_functions import scoring_functions
|
||||
from page.distribution.shields import shields
|
||||
|
@ -20,7 +20,7 @@ def resources_page():
|
|||
"Shields",
|
||||
"Scoring Functions",
|
||||
"Datasets",
|
||||
"Eval Tasks",
|
||||
"Benchmarks",
|
||||
]
|
||||
icons = ["magic", "memory", "shield", "file-bar-graph", "database", "list-task"]
|
||||
selected_resource = option_menu(
|
||||
|
@ -34,8 +34,8 @@ def resources_page():
|
|||
},
|
||||
},
|
||||
)
|
||||
if selected_resource == "Eval Tasks":
|
||||
eval_tasks()
|
||||
if selected_resource == "Benchmarks":
|
||||
benchmarks()
|
||||
elif selected_resource == "Vector Databases":
|
||||
vector_dbs()
|
||||
elif selected_resource == "Datasets":
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue