mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-31 10:43:52 +00:00
pre-commit fixes
This commit is contained in:
parent
967dd0aa08
commit
7e211f8553
314 changed files with 5574 additions and 11369 deletions
|
|
@ -3675,7 +3675,7 @@
|
|||
" benchmark_id=\"llama3.2-3B-instruct:tax_eval\",\n",
|
||||
" input_rows=eval_rows.rows,\n",
|
||||
" scoring_functions=[\"braintrust::answer-similarity\"],\n",
|
||||
" task_config={\n",
|
||||
" benchmark_config={\n",
|
||||
" \"type\": \"benchmark\",\n",
|
||||
" \"eval_candidate\": {\n",
|
||||
" \"type\": \"model\",\n",
|
||||
|
|
@ -6383,7 +6383,7 @@
|
|||
" benchmark_id=\"Llama-3.2-3B-Instruct-sft-0:tax_eval\",\n",
|
||||
" input_rows=eval_rows.rows,\n",
|
||||
" scoring_functions=[\"braintrust::answer-similarity\"],\n",
|
||||
" task_config={\n",
|
||||
" benchmark_config={\n",
|
||||
" \"type\": \"benchmark\",\n",
|
||||
" \"eval_candidate\": {\n",
|
||||
" \"type\": \"model\",\n",
|
||||
|
|
|
|||
|
|
@ -781,7 +781,7 @@
|
|||
" benchmark_id=\"meta-reference::mmmu\",\n",
|
||||
" input_rows=eval_rows,\n",
|
||||
" scoring_functions=[\"basic::regex_parser_multiple_choice_answer\"],\n",
|
||||
" task_config={\n",
|
||||
" benchmark_config={\n",
|
||||
" \"type\": \"benchmark\",\n",
|
||||
" \"eval_candidate\": {\n",
|
||||
" \"type\": \"model\",\n",
|
||||
|
|
@ -826,10 +826,9 @@
|
|||
"_ = client.datasets.register(\n",
|
||||
" dataset_id=simpleqa_dataset_id,\n",
|
||||
" provider_id=\"huggingface\",\n",
|
||||
" url={\"uri\": \"https://huggingface.co/datasets/llamastack/evals\"},\n",
|
||||
" url={\"uri\": \"https://huggingface.co/datasets/llamastack/simpleqa\"},\n",
|
||||
" metadata={\n",
|
||||
" \"path\": \"llamastack/evals\",\n",
|
||||
" \"name\": \"evals__simpleqa\",\n",
|
||||
" \"path\": \"llamastack/simpleqa\",\n",
|
||||
" \"split\": \"train\",\n",
|
||||
" },\n",
|
||||
" dataset_schema={\n",
|
||||
|
|
@ -960,7 +959,7 @@
|
|||
" benchmark_id=\"meta-reference::simpleqa\",\n",
|
||||
" input_rows=eval_rows.rows,\n",
|
||||
" scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n",
|
||||
" task_config={\n",
|
||||
" benchmark_config={\n",
|
||||
" \"type\": \"benchmark\",\n",
|
||||
" \"eval_candidate\": {\n",
|
||||
" \"type\": \"model\",\n",
|
||||
|
|
@ -1109,7 +1108,7 @@
|
|||
" benchmark_id=\"meta-reference::simpleqa\",\n",
|
||||
" input_rows=eval_rows.rows,\n",
|
||||
" scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n",
|
||||
" task_config={\n",
|
||||
" benchmark_config={\n",
|
||||
" \"type\": \"benchmark\",\n",
|
||||
" \"eval_candidate\": {\n",
|
||||
" \"type\": \"agent\",\n",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue