This commit is contained in:
Xi Yan 2025-03-15 17:16:38 -07:00
parent b561cfd902
commit 659f5e86ee
5 changed files with 1094 additions and 1108 deletions

File diff suppressed because one or more lines are too long

View file

@ -847,10 +847,10 @@
},
"outputs": [],
"source": [
"eval_rows = client.datasetio.get_rows_paginated(\n",
"eval_rows = client.datasets.iterrows(\n",
" dataset_id=simpleqa_dataset_id,\n",
" rows_in_page=5,\n",
")\n"
" limit=5,\n",
")"
]
},
{
@ -957,7 +957,7 @@
"\n",
"response = client.eval.evaluate_rows_alpha(\n",
" benchmark_id=\"meta-reference::simpleqa\",\n",
" input_rows=eval_rows.rows,\n",
" input_rows=eval_rows.data,\n",
" scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n",
" benchmark_config={\n",
" \"type\": \"benchmark\",\n",
@ -1106,7 +1106,7 @@
"\n",
"response = client.eval.evaluate_rows_alpha(\n",
" benchmark_id=\"meta-reference::simpleqa\",\n",
" input_rows=eval_rows.rows,\n",
" input_rows=eval_rows.data,\n",
" scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n",
" benchmark_config={\n",
" \"type\": \"benchmark\",\n",

View file

@ -128,9 +128,9 @@ _ = client.datasets.register(
},
)
eval_rows = client.datasetio.get_rows_paginated(
eval_rows = client.datasets.iterrows(
dataset_id=simpleqa_dataset_id,
rows_in_page=5,
limit=5,
)
```
@ -143,7 +143,7 @@ client.benchmarks.register(
response = client.eval.evaluate_rows(
benchmark_id="meta-reference::simpleqa",
input_rows=eval_rows.rows,
input_rows=eval_rows.data,
scoring_functions=["llm-as-judge::405b-simpleqa"],
benchmark_config={
"eval_candidate": {
@ -191,7 +191,7 @@ agent_config = {
response = client.eval.evaluate_rows(
benchmark_id="meta-reference::simpleqa",
input_rows=eval_rows.rows,
input_rows=eval_rows.data,
scoring_functions=["llm-as-judge::405b-simpleqa"],
benchmark_config={
"eval_candidate": {