chore: rename task_config to benchmark_config (#1397)

# What does this PR do?

- This was missed from previous deprecation:
https://github.com/meta-llama/llama-stack/pull/1186
- Part of https://github.com/meta-llama/llama-stack/issues/1396

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
```
pytest -v -s --nbval-lax ./llama-stack/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb 
```

[//]: # (## Documentation)
This commit is contained in:
Xi Yan 2025-03-04 12:44:04 -08:00 committed by GitHub
parent 158b6dc404
commit e9a37bad63
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 55 additions and 46 deletions

View file

@ -51,7 +51,7 @@ response = client.eval.evaluate_rows(
benchmark_id="meta-reference::mmmu",
input_rows=eval_rows,
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
task_config={
benchmark_config={
"type": "benchmark",
"eval_candidate": {
"type": "model",
@ -109,7 +109,7 @@ response = client.eval.evaluate_rows(
benchmark_id="meta-reference::simpleqa",
input_rows=eval_rows.rows,
scoring_functions=["llm-as-judge::405b-simpleqa"],
task_config={
benchmark_config={
"type": "benchmark",
"eval_candidate": {
"type": "model",
@ -158,7 +158,7 @@ response = client.eval.evaluate_rows(
benchmark_id="meta-reference::simpleqa",
input_rows=eval_rows.rows,
scoring_functions=["llm-as-judge::405b-simpleqa"],
task_config={
benchmark_config={
"type": "benchmark",
"eval_candidate": {
"type": "agent",

View file

@ -19,7 +19,7 @@ response = client.benchmarks.register(
# Run evaluation
job = client.eval.run_eval(
benchmark_id="my_eval",
task_config={
benchmark_config={
"type": "app",
"eval_candidate": {"type": "agent", "config": agent_config},
},

View file

@ -87,7 +87,7 @@ response = client.eval.evaluate_rows(
benchmark_id="meta-reference::mmmu",
input_rows=eval_rows,
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
task_config={
benchmark_config={
"type": "benchmark",
"eval_candidate": {
"type": "model",
@ -145,7 +145,7 @@ response = client.eval.evaluate_rows(
benchmark_id="meta-reference::simpleqa",
input_rows=eval_rows.rows,
scoring_functions=["llm-as-judge::405b-simpleqa"],
task_config={
benchmark_config={
"type": "benchmark",
"eval_candidate": {
"type": "model",
@ -195,7 +195,7 @@ response = client.eval.evaluate_rows(
benchmark_id="meta-reference::simpleqa",
input_rows=eval_rows.rows,
scoring_functions=["llm-as-judge::405b-simpleqa"],
task_config={
benchmark_config={
"type": "benchmark",
"eval_candidate": {
"type": "agent",