wip tests

2025-12-17 05:32:36 +00:00 · 2024-11-06 10:03:49 -08:00 · 2024-11-06 10:03:49 -08:00 · 683a370d23
commit 683a370d23
parent be7b76ceac
4 changed files with 227 additions and 128 deletions
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@ -51,6 +51,11 @@ class AppEvalTaskConfig(BaseModel):
    # we could optinally add any specific dataset config here


+EvalTaskConfig = Annotated[
+    Union[BenchmarkEvalTaskConfig, AppEvalTaskConfig], Field(discriminator="type")
+]
+
+
@json_schema_type
 class EvaluateResponse(BaseModel):
    generations: List[Dict[str, Any]]
@ -70,7 +75,7 @@ class Eval(Protocol):
    async def run_eval(
        self,
        eval_task_def: EvalTaskDef,  # type: ignore
-        eval_task_config: AppEvalTaskConfig,  # type: ignore
+        eval_task_config: EvalTaskConfig,  # type: ignore
    ) -> Job: ...

    @webmethod(route="/eval/evaluate_rows", method="POST")
--- a/llama_stack/apis/eval_tasks/init.py
+++ b/llama_stack/apis/eval_tasks/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .eval_tasks import *  # noqa: F401 F403