[Evals API][10/n] API updates for EvalTaskDef + new test migration (#379)

* wip * scoring fn api * eval api * eval task * evaluate api update * pre commit * unwrap context -> config * config field doc * typo * naming fix * separate benchmark / app eval * api name * rename * wip tests * wip * datasetio test * delete unused * fixture * scoring resolve * fix scoring register * scoring test pass * score batch * scoring fix * fix eval * test eval works * remove type ignore * api refactor * add default task_eval_id for routing * add eval_id for jobs * remove type ignore * only keep 1 run_eval * fix optional * register task required * register task required * delete old tests * delete old tests * fixture return impl
2024-11-07 21:24:12 -08:00 · 2024-11-07 21:24:12 -08:00 · 6192bf43a4
commit 6192bf43a4
parent 8350f2df4c
32 changed files with 916 additions and 389 deletions
--- a/llama_stack/providers/tests/eval/fixtures.py
+++ b/llama_stack/providers/tests/eval/fixtures.py
@ -0,0 +1,55 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import pytest
+import pytest_asyncio
+
+from llama_stack.distribution.datatypes import Api, Provider
+
+from llama_stack.providers.tests.resolver import resolve_impls_for_test_v2
+from ..conftest import ProviderFixture, remote_stack_fixture
+
+
+@pytest.fixture(scope="session")
+def eval_remote() -> ProviderFixture:
+    return remote_stack_fixture()
+
+
+@pytest.fixture(scope="session")
+def eval_meta_reference() -> ProviderFixture:
+    return ProviderFixture(
+        providers=[
+            Provider(
+                provider_id="meta-reference",
+                provider_type="meta-reference",
+                config={},
+            )
+        ],
+    )
+
+
+EVAL_FIXTURES = ["meta_reference", "remote"]
+
+
+@pytest_asyncio.fixture(scope="session")
+async def eval_stack(request):
+    fixture_dict = request.param
+
+    providers = {}
+    provider_data = {}
+    for key in ["datasetio", "eval", "scoring", "inference"]:
+        fixture = request.getfixturevalue(f"{key}_{fixture_dict[key]}")
+        providers[key] = fixture.providers
+        if fixture.provider_data:
+            provider_data.update(fixture.provider_data)
+
+    impls = await resolve_impls_for_test_v2(
+        [Api.eval, Api.datasetio, Api.inference, Api.scoring],
+        providers,
+        provider_data,
+    )
+
+    return impls