benchmark

This commit is contained in:
Xi Yan 2025-03-06 12:43:25 -08:00
parent 47fea967a7
commit 000569b003

View file

@ -86,7 +86,6 @@ response = client.eval.evaluate_rows_alpha(
input_rows=eval_rows, input_rows=eval_rows,
scoring_functions=["basic::regex_parser_multiple_choice_answer"], scoring_functions=["basic::regex_parser_multiple_choice_answer"],
benchmark_config={ benchmark_config={
"type": "benchmark",
"eval_candidate": { "eval_candidate": {
"type": "model", "type": "model",
"model": "meta-llama/Llama-3.2-90B-Vision-Instruct", "model": "meta-llama/Llama-3.2-90B-Vision-Instruct",
@ -153,7 +152,6 @@ response = client.eval.evaluate_rows(
input_rows=eval_rows.rows, input_rows=eval_rows.rows,
scoring_functions=["llm-as-judge::405b-simpleqa"], scoring_functions=["llm-as-judge::405b-simpleqa"],
benchmark_config={ benchmark_config={
"type": "benchmark",
"eval_candidate": { "eval_candidate": {
"type": "model", "type": "model",
"model": "meta-llama/Llama-3.2-90B-Vision-Instruct", "model": "meta-llama/Llama-3.2-90B-Vision-Instruct",
@ -202,7 +200,6 @@ response = client.eval.evaluate_rows(
input_rows=eval_rows.rows, input_rows=eval_rows.rows,
scoring_functions=["llm-as-judge::405b-simpleqa"], scoring_functions=["llm-as-judge::405b-simpleqa"],
benchmark_config={ benchmark_config={
"type": "benchmark",
"eval_candidate": { "eval_candidate": {
"type": "agent", "type": "agent",
"config": agent_config, "config": agent_config,
@ -324,7 +321,6 @@ The `BenchmarkConfig` are user specified config to define:
**Example BenchmarkConfig** **Example BenchmarkConfig**
```json ```json
{ {
"type": "app",
"eval_candidate": { "eval_candidate": {
"type": "model", "type": "model",
"model": "Llama3.1-405B-Instruct", "model": "Llama3.1-405B-Instruct",