mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-01 16:24:31 +00:00
eval
This commit is contained in:
parent
b561cfd902
commit
659f5e86ee
5 changed files with 1094 additions and 1108 deletions
|
|
@ -128,9 +128,9 @@ _ = client.datasets.register(
|
|||
},
|
||||
)
|
||||
|
||||
eval_rows = client.datasetio.get_rows_paginated(
|
||||
eval_rows = client.datasets.iterrows(
|
||||
dataset_id=simpleqa_dataset_id,
|
||||
rows_in_page=5,
|
||||
limit=5,
|
||||
)
|
||||
```
|
||||
|
||||
|
|
@ -143,7 +143,7 @@ client.benchmarks.register(
|
|||
|
||||
response = client.eval.evaluate_rows(
|
||||
benchmark_id="meta-reference::simpleqa",
|
||||
input_rows=eval_rows.rows,
|
||||
input_rows=eval_rows.data,
|
||||
scoring_functions=["llm-as-judge::405b-simpleqa"],
|
||||
benchmark_config={
|
||||
"eval_candidate": {
|
||||
|
|
@ -191,7 +191,7 @@ agent_config = {
|
|||
|
||||
response = client.eval.evaluate_rows(
|
||||
benchmark_id="meta-reference::simpleqa",
|
||||
input_rows=eval_rows.rows,
|
||||
input_rows=eval_rows.data,
|
||||
scoring_functions=["llm-as-judge::405b-simpleqa"],
|
||||
benchmark_config={
|
||||
"eval_candidate": {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue