From 041634192ab7abffd7f5c691bf2f113d999679c9 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Sat, 5 Oct 2024 11:57:21 -0700 Subject: [PATCH] move folder --- llama_stack/apis/evals/client.py | 13 +++++++++++-- llama_stack/apis/inference/client.py | 2 +- .../impls/meta_reference/evals/tasks/mmlu_task.py | 2 +- .../third_party}/evals/__init__.py | 0 .../third_party}/evals/eleuther/__init__.py | 0 .../third_party}/evals/eleuther/config.py | 0 .../third_party}/evals/eleuther/eleuther.py | 0 llama_stack/providers/registry/evals.py | 4 ++-- tests/examples/local-run.yaml | 3 +++ 9 files changed, 18 insertions(+), 6 deletions(-) rename llama_stack/providers/{adapters => impls/third_party}/evals/__init__.py (100%) rename llama_stack/providers/{adapters => impls/third_party}/evals/eleuther/__init__.py (100%) rename llama_stack/providers/{adapters => impls/third_party}/evals/eleuther/config.py (100%) rename llama_stack/providers/{adapters => impls/third_party}/evals/eleuther/eleuther.py (100%) diff --git a/llama_stack/apis/evals/client.py b/llama_stack/apis/evals/client.py index a1f696dff..2a6947b32 100644 --- a/llama_stack/apis/evals/client.py +++ b/llama_stack/apis/evals/client.py @@ -42,12 +42,21 @@ class EvaluationClient(Evals): async def run_main(host: str, port: int): client = EvaluationClient(f"http://{host}:{port}") + # CustomDataset + # response = await client.run_evals( + # "Llama3.1-8B-Instruct", + # "mmlu-simple-eval-en", + # "mmlu", + # ) + # cprint(f"evaluate response={response}", "green") + + # Eleuther Eval response = await client.run_evals( "Llama3.1-8B-Instruct", - "mmlu-simple-eval-en", + "PLACEHOLDER_DATASET_NAME", "mmlu", ) - cprint(f"evaluate response={response}", "green") + cprint(response.metrics["metrics_table"], "red") def main(host: str, port: int): diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index 2aae1cc55..92acc3e14 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -109,7 +109,7 @@ async def run_main(host: str, port: int, stream: bool): cprint(f"User>{message.content}", "green") iterator = client.chat_completion( model="Llama3.1-8B-Instruct", - messages=[message, UserMessage(content="write me 3 sentence about the sun.")], + messages=[message], stream=stream, ) async for log in EventLogger().log(iterator): diff --git a/llama_stack/providers/impls/meta_reference/evals/tasks/mmlu_task.py b/llama_stack/providers/impls/meta_reference/evals/tasks/mmlu_task.py index 4f2939db1..d74476628 100644 --- a/llama_stack/providers/impls/meta_reference/evals/tasks/mmlu_task.py +++ b/llama_stack/providers/impls/meta_reference/evals/tasks/mmlu_task.py @@ -146,5 +146,5 @@ class MMLUTask(BaseTask): def aggregate_results(self, eval_results): return EvaluateResponse( - metrics={"score": sum(eval_results) / len(eval_results)} + metrics={"score": str(sum(eval_results) / len(eval_results))} ) diff --git a/llama_stack/providers/adapters/evals/__init__.py b/llama_stack/providers/impls/third_party/evals/__init__.py similarity index 100% rename from llama_stack/providers/adapters/evals/__init__.py rename to llama_stack/providers/impls/third_party/evals/__init__.py diff --git a/llama_stack/providers/adapters/evals/eleuther/__init__.py b/llama_stack/providers/impls/third_party/evals/eleuther/__init__.py similarity index 100% rename from llama_stack/providers/adapters/evals/eleuther/__init__.py rename to llama_stack/providers/impls/third_party/evals/eleuther/__init__.py diff --git a/llama_stack/providers/adapters/evals/eleuther/config.py b/llama_stack/providers/impls/third_party/evals/eleuther/config.py similarity index 100% rename from llama_stack/providers/adapters/evals/eleuther/config.py rename to llama_stack/providers/impls/third_party/evals/eleuther/config.py diff --git a/llama_stack/providers/adapters/evals/eleuther/eleuther.py b/llama_stack/providers/impls/third_party/evals/eleuther/eleuther.py similarity index 100% rename from llama_stack/providers/adapters/evals/eleuther/eleuther.py rename to llama_stack/providers/impls/third_party/evals/eleuther/eleuther.py diff --git a/llama_stack/providers/registry/evals.py b/llama_stack/providers/registry/evals.py index c1630aa07..8693ec603 100644 --- a/llama_stack/providers/registry/evals.py +++ b/llama_stack/providers/registry/evals.py @@ -33,8 +33,8 @@ def available_providers() -> List[ProviderSpec]: pip_packages=[ "lm-eval", ], - module="llama_stack.providers.adapters.evals.eleuther", - config_class="llama_stack.providers.adapters.evals.eleuther.EleutherEvalsImplConfig", + module="llama_stack.providers.impls.third_party.evals.eleuther", + config_class="llama_stack.providers.impls.third_party.evals.eleuther.EleutherEvalsImplConfig", api_dependencies=[ Api.inference, ], diff --git a/tests/examples/local-run.yaml b/tests/examples/local-run.yaml index fa082a58c..4a616bc88 100644 --- a/tests/examples/local-run.yaml +++ b/tests/examples/local-run.yaml @@ -15,6 +15,9 @@ api_providers: evals: provider_type: eleuther config: {} + # evals: + # provider_type: meta-reference + # config: {} inference: providers: - meta-reference