test_scoring

2025-12-12 04:00:42 +00:00 · 2024-10-23 13:01:49 -07:00 · 2024-10-23 13:01:49 -07:00 · 92e32f80ad
commit 92e32f80ad
parent 7c280e18fb
15 changed files with 240 additions and 5 deletions
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@ -11,10 +11,9 @@ from llama_models.schema_utils import json_schema_type
 from pydantic import BaseModel, Field

 from llama_stack.apis.datasets import DatasetDef
-
 from llama_stack.apis.memory_banks import MemoryBankDef
-
 from llama_stack.apis.models import ModelDef
+from llama_stack.apis.scoring_functions import ScoringFunctionDef
 from llama_stack.apis.shields import ShieldDef


@ -25,6 +24,7 @@ class Api(Enum):
    agents = "agents"
    memory = "memory"
    datasetio = "datasetio"
+    scoring = "scoring"

    telemetry = "telemetry"

@ -32,6 +32,7 @@ class Api(Enum):
    shields = "shields"
    memory_banks = "memory_banks"
    datasets = "datasets"
+    scoring_functions = "scoring_functions"

    # built-in API
    inspect = "inspect"
@ -61,6 +62,14 @@ class DatasetsProtocolPrivate(Protocol):
    async def register_datasets(self, dataset_def: DatasetDef) -> None: ...


+class ScoringFunctionsProtocolPrivate(Protocol):
+    async def list_scoring_functions(self) -> List[ScoringFunctionDef]: ...
+
+    async def register_scoring_function(
+        self, function_def: ScoringFunctionDef
+    ) -> None: ...
+
+
@json_schema_type
 class ProviderSpec(BaseModel):
    api: Api
--- a/llama_stack/providers/impls/meta_reference/scoring/init.py
+++ b/llama_stack/providers/impls/meta_reference/scoring/init.py
@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .config import MetaReferenceScoringConfig
+
+
+async def get_provider_impl(
+    config: MetaReferenceScoringConfig,
+    _deps,
+):
+    from .scoring import MetaReferenceScoringImpl
+
+    impl = MetaReferenceScoringImpl(config)
+    await impl.initialize()
+    return impl
--- a/llama_stack/providers/impls/meta_reference/scoring/config.py
+++ b/llama_stack/providers/impls/meta_reference/scoring/config.py
@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.apis.scoring import *  # noqa: F401, F403
+
+
+class MetaReferenceScoringConfig(BaseModel): ...
--- a/llama_stack/providers/impls/meta_reference/scoring/scoring.py
+++ b/llama_stack/providers/impls/meta_reference/scoring/scoring.py
@ -0,0 +1,33 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import List
+
+from llama_models.llama3.api.datatypes import *  # noqa: F403
+from llama_stack.apis.scoring import *  # noqa: F403
+
+from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
+
+from .config import MetaReferenceScoringConfig
+
+
+class MetaReferenceScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):
+    def __init__(self, config: MetaReferenceScoringConfig) -> None:
+        self.config = config
+        self.dataset_infos = {}
+
+    async def initialize(self) -> None: ...
+
+    async def shutdown(self) -> None: ...
+
+    async def score_batch(
+        self, dataset_id: str, scoring_functions: List[str]
+    ) -> ScoreBatchResponse:
+        print("score_batch")
+
+    async def score(
+        self, input_rows: List[Dict[str, Any]], scoring_functions: List[str]
+    ) -> ScoreResponse:
+        print("score")
--- a/llama_stack/providers/registry/scoring.py
+++ b/llama_stack/providers/registry/scoring.py
@ -0,0 +1,24 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import List
+
+from llama_stack.distribution.datatypes import *  # noqa: F403
+
+
+def available_providers() -> List[ProviderSpec]:
+    return [
+        InlineProviderSpec(
+            api=Api.scoring,
+            provider_type="meta-reference",
+            pip_packages=[],
+            module="llama_stack.providers.impls.meta_reference.scoring",
+            config_class="llama_stack.providers.impls.meta_reference.scoring.MetaReferenceScoringConfig",
+            api_dependencies=[
+                Api.datasetio,
+            ],
+        ),
+    ]
--- a/llama_stack/providers/tests/scoring/init.py
+++ b/llama_stack/providers/tests/scoring/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/llama_stack/providers/tests/scoring/provider_config_example.yaml
+++ b/llama_stack/providers/tests/scoring/provider_config_example.yaml
@ -0,0 +1,9 @@
+providers:
+  datasetio:
+  - provider_id: test-meta
+    provider_type: meta-reference
+    config: {}
+  scoring:
+    - provider_id: test-meta
+      provider_type: meta-reference
+      config: {}
--- a/llama_stack/providers/tests/scoring/test_scoring.py
+++ b/llama_stack/providers/tests/scoring/test_scoring.py
@ -0,0 +1,49 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import pytest
+import pytest_asyncio
+
+from llama_stack.apis.common.type_system import *  # noqa: F403
+from llama_stack.apis.datasetio import *  # noqa: F403
+from llama_stack.distribution.datatypes import *  # noqa: F403
+
+from llama_stack.providers.tests.resolver import resolve_impls_for_test
+
+# How to run this test:
+#
+# 1. Ensure you have a conda with the right dependencies installed. This is a bit tricky
+#    since it depends on the provider you are testing. On top of that you need
+#    `pytest` and `pytest-asyncio` installed.
+#
+# 2. Copy and modify the provider_config_example.yaml depending on the provider you are testing.
+#
+# 3. Run:
+#
+# ```bash
+# PROVIDER_ID=<your_provider> \
+#   PROVIDER_CONFIG=provider_config.yaml \
+#   pytest -s llama_stack/providers/tests/scoring/test_scoring.py \
+#   --tb=short --disable-warnings
+# ```
+
+
+@pytest_asyncio.fixture(scope="session")
+async def scoring_settings():
+    impls = await resolve_impls_for_test(Api.scoring, deps=[Api.datasetio])
+    return {
+        "scoring_impl": impls[Api.scoring],
+        "scoring_functions_impl": impls[Api.scoring_functions],
+    }
+
+
+@pytest.mark.asyncio
+async def test_scoring_functions_list(scoring_settings):
+    # NOTE: this needs you to ensure that you are starting from a clean state
+    # but so far we don't have an unregister API unfortunately, so be careful
+    scoring_functions_impl = scoring_settings["scoring_functions_impl"]
+    response = await scoring_functions_impl.list_scoring_functions()
+    assert isinstance(response, list)
+    assert len(response) == 0