From ae43044a57d56eee725d88bbc514d4e37bfad2c3 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 9 Oct 2024 11:30:35 -0700 Subject: [PATCH] rename tasks --- llama_stack/apis/evals/client.py | 2 +- .../providers/impls/third_party/evals/eleuther/eleuther.py | 3 +-- .../eleuther/tasks/{ifeval => meta_ifeval}/ifeval.yaml | 0 .../evals/eleuther/tasks/{ifeval => meta_ifeval}/utils.py | 0 .../mmlu_pro_5shot_cot_instruct.yaml | 0 .../eleuther/tasks/{mmlu_pro => meta_mmlu_pro}/utils.py | 0 tests/examples/local-run.yaml | 6 +++--- 7 files changed, 5 insertions(+), 6 deletions(-) rename llama_stack/providers/impls/third_party/evals/eleuther/tasks/{ifeval => meta_ifeval}/ifeval.yaml (100%) rename llama_stack/providers/impls/third_party/evals/eleuther/tasks/{ifeval => meta_ifeval}/utils.py (100%) rename llama_stack/providers/impls/third_party/evals/eleuther/tasks/{mmlu_pro => meta_mmlu_pro}/mmlu_pro_5shot_cot_instruct.yaml (100%) rename llama_stack/providers/impls/third_party/evals/eleuther/tasks/{mmlu_pro => meta_mmlu_pro}/utils.py (100%) diff --git a/llama_stack/apis/evals/client.py b/llama_stack/apis/evals/client.py index aa617a5f1..4acbff5f6 100644 --- a/llama_stack/apis/evals/client.py +++ b/llama_stack/apis/evals/client.py @@ -44,7 +44,7 @@ async def run_main(host: str, port: int): # CustomDataset response = await client.run_evals( - "Llama3.2-1B-Instruct", + "Llama3.1-8B-Instruct", "mmlu-simple-eval-en", "mmlu", ) diff --git a/llama_stack/providers/impls/third_party/evals/eleuther/eleuther.py b/llama_stack/providers/impls/third_party/evals/eleuther/eleuther.py index 7f307a9d3..ab27fcaee 100644 --- a/llama_stack/providers/impls/third_party/evals/eleuther/eleuther.py +++ b/llama_stack/providers/impls/third_party/evals/eleuther/eleuther.py @@ -90,7 +90,6 @@ class EleutherEvalsWrapper(LM): def generate_until(self, requests, disable_tqdm: bool = False) -> List[str]: res = [] for req in requests: - print("generation for msg: ", req.args[0]) response = self.inference_api.chat_completion( model=self.model, messages=[ @@ -144,7 +143,7 @@ class EleutherEvalsAdapter(Evals): output = evaluate( eluther_wrapper, task_dict, - limit=1, + limit=10, ) formatted_output = lm_eval.utils.make_table(output) diff --git a/llama_stack/providers/impls/third_party/evals/eleuther/tasks/ifeval/ifeval.yaml b/llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_ifeval/ifeval.yaml similarity index 100% rename from llama_stack/providers/impls/third_party/evals/eleuther/tasks/ifeval/ifeval.yaml rename to llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_ifeval/ifeval.yaml diff --git a/llama_stack/providers/impls/third_party/evals/eleuther/tasks/ifeval/utils.py b/llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_ifeval/utils.py similarity index 100% rename from llama_stack/providers/impls/third_party/evals/eleuther/tasks/ifeval/utils.py rename to llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_ifeval/utils.py diff --git a/llama_stack/providers/impls/third_party/evals/eleuther/tasks/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml b/llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml similarity index 100% rename from llama_stack/providers/impls/third_party/evals/eleuther/tasks/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml rename to llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml diff --git a/llama_stack/providers/impls/third_party/evals/eleuther/tasks/mmlu_pro/utils.py b/llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_mmlu_pro/utils.py similarity index 100% rename from llama_stack/providers/impls/third_party/evals/eleuther/tasks/mmlu_pro/utils.py rename to llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_mmlu_pro/utils.py diff --git a/tests/examples/local-run.yaml b/tests/examples/local-run.yaml index a09736cd4..71f745a0c 100644 --- a/tests/examples/local-run.yaml +++ b/tests/examples/local-run.yaml @@ -21,7 +21,7 @@ providers: - provider_id: meta-reference provider_type: meta-reference config: - model: Llama3.2-1B-Instruct + model: Llama3.1-8B-Instruct quantization: null torch_seed: null max_seq_len: 4096 @@ -54,8 +54,8 @@ providers: provider_type: meta-reference config: {} models: -- identifier: Llama3.2-1B-Instruct - llama_model: Llama3.2-1B-Instruct +- identifier: Llama3.1-8B-Instruct + llama_model: Llama3.1-8B-Instruct provider_id: meta-reference shields: - identifier: llama_guard