From 73de235ef1162fbba1f55cbef361654a4376f0f7 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 30 Sep 2025 13:02:33 -0700 Subject: [PATCH] fix(eval): use client.alpha for eval tests --- tests/integration/eval/test_eval.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/eval/test_eval.py b/tests/integration/eval/test_eval.py index d1c3de519..01581e829 100644 --- a/tests/integration/eval/test_eval.py +++ b/tests/integration/eval/test_eval.py @@ -45,7 +45,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id): list_benchmarks = llama_stack_client.benchmarks.list() assert any(x.identifier == benchmark_id for x in list_benchmarks) - response = llama_stack_client.eval.evaluate_rows( + response = llama_stack_client.alpha.eval.evaluate_rows( benchmark_id=benchmark_id, input_rows=rows.data, scoring_functions=scoring_functions, @@ -80,7 +80,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id): scoring_functions=[scoring_fn_id], ) - response = llama_stack_client.eval.run_eval( + response = llama_stack_client.alpha.eval.run_eval( benchmark_id=benchmark_id, benchmark_config={ "eval_candidate": { @@ -93,10 +93,10 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id): }, ) assert response.job_id == "0" - job_status = llama_stack_client.eval.jobs.status(job_id=response.job_id, benchmark_id=benchmark_id) + job_status = llama_stack_client.alpha.eval.jobs.status(job_id=response.job_id, benchmark_id=benchmark_id) assert job_status and job_status.status == "completed" - eval_response = llama_stack_client.eval.jobs.retrieve(job_id=response.job_id, benchmark_id=benchmark_id) + eval_response = llama_stack_client.alpha.eval.jobs.retrieve(job_id=response.job_id, benchmark_id=benchmark_id) assert eval_response is not None assert len(eval_response.generations) == 5 assert scoring_fn_id in eval_response.scores