From ae43044a57d56eee725d88bbc514d4e37bfad2c3 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Wed, 9 Oct 2024 11:30:35 -0700
Subject: [PATCH] rename tasks

---
 llama_stack/apis/evals/client.py                            | 2 +-
 .../providers/impls/third_party/evals/eleuther/eleuther.py  | 3 +--
 .../eleuther/tasks/{ifeval => meta_ifeval}/ifeval.yaml      | 0
 .../evals/eleuther/tasks/{ifeval => meta_ifeval}/utils.py   | 0
 .../mmlu_pro_5shot_cot_instruct.yaml                        | 0
 .../eleuther/tasks/{mmlu_pro => meta_mmlu_pro}/utils.py     | 0
 tests/examples/local-run.yaml                               | 6 +++---
 7 files changed, 5 insertions(+), 6 deletions(-)
 rename llama_stack/providers/impls/third_party/evals/eleuther/tasks/{ifeval => meta_ifeval}/ifeval.yaml (100%)
 rename llama_stack/providers/impls/third_party/evals/eleuther/tasks/{ifeval => meta_ifeval}/utils.py (100%)
 rename llama_stack/providers/impls/third_party/evals/eleuther/tasks/{mmlu_pro => meta_mmlu_pro}/mmlu_pro_5shot_cot_instruct.yaml (100%)
 rename llama_stack/providers/impls/third_party/evals/eleuther/tasks/{mmlu_pro => meta_mmlu_pro}/utils.py (100%)

diff --git a/llama_stack/apis/evals/client.py b/llama_stack/apis/evals/client.py
index aa617a5f1..4acbff5f6 100644
--- a/llama_stack/apis/evals/client.py
+++ b/llama_stack/apis/evals/client.py
@@ -44,7 +44,7 @@ async def run_main(host: str, port: int):
 
     # CustomDataset
     response = await client.run_evals(
-        "Llama3.2-1B-Instruct",
+        "Llama3.1-8B-Instruct",
         "mmlu-simple-eval-en",
         "mmlu",
     )
diff --git a/llama_stack/providers/impls/third_party/evals/eleuther/eleuther.py b/llama_stack/providers/impls/third_party/evals/eleuther/eleuther.py
index 7f307a9d3..ab27fcaee 100644
--- a/llama_stack/providers/impls/third_party/evals/eleuther/eleuther.py
+++ b/llama_stack/providers/impls/third_party/evals/eleuther/eleuther.py
@@ -90,7 +90,6 @@ class EleutherEvalsWrapper(LM):
     def generate_until(self, requests, disable_tqdm: bool = False) -> List[str]:
         res = []
         for req in requests:
-            print("generation for msg: ", req.args[0])
             response = self.inference_api.chat_completion(
                 model=self.model,
                 messages=[
@@ -144,7 +143,7 @@ class EleutherEvalsAdapter(Evals):
         output = evaluate(
             eluther_wrapper,
             task_dict,
-            limit=1,
+            limit=10,
         )
 
         formatted_output = lm_eval.utils.make_table(output)
diff --git a/llama_stack/providers/impls/third_party/evals/eleuther/tasks/ifeval/ifeval.yaml b/llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_ifeval/ifeval.yaml
similarity index 100%
rename from llama_stack/providers/impls/third_party/evals/eleuther/tasks/ifeval/ifeval.yaml
rename to llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_ifeval/ifeval.yaml
diff --git a/llama_stack/providers/impls/third_party/evals/eleuther/tasks/ifeval/utils.py b/llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_ifeval/utils.py
similarity index 100%
rename from llama_stack/providers/impls/third_party/evals/eleuther/tasks/ifeval/utils.py
rename to llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_ifeval/utils.py
diff --git a/llama_stack/providers/impls/third_party/evals/eleuther/tasks/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml b/llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml
similarity index 100%
rename from llama_stack/providers/impls/third_party/evals/eleuther/tasks/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml
rename to llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml
diff --git a/llama_stack/providers/impls/third_party/evals/eleuther/tasks/mmlu_pro/utils.py b/llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_mmlu_pro/utils.py
similarity index 100%
rename from llama_stack/providers/impls/third_party/evals/eleuther/tasks/mmlu_pro/utils.py
rename to llama_stack/providers/impls/third_party/evals/eleuther/tasks/meta_mmlu_pro/utils.py
diff --git a/tests/examples/local-run.yaml b/tests/examples/local-run.yaml
index a09736cd4..71f745a0c 100644
--- a/tests/examples/local-run.yaml
+++ b/tests/examples/local-run.yaml
@@ -21,7 +21,7 @@ providers:
   - provider_id: meta-reference
     provider_type: meta-reference
     config:
-      model: Llama3.2-1B-Instruct
+      model: Llama3.1-8B-Instruct
       quantization: null
       torch_seed: null
       max_seq_len: 4096
@@ -54,8 +54,8 @@ providers:
     provider_type: meta-reference
     config: {}
 models:
-- identifier: Llama3.2-1B-Instruct
-  llama_model: Llama3.2-1B-Instruct
+- identifier: Llama3.1-8B-Instruct
+  llama_model: Llama3.1-8B-Instruct
   provider_id: meta-reference
 shields:
 - identifier: llama_guard