test_scoring

2025-07-29 23:29:43 +00:00 · 2024-10-23 13:01:49 -07:00 · 2024-10-23 13:01:49 -07:00 · 92e32f80ad
commit 92e32f80ad
parent 7c280e18fb
15 changed files with 240 additions and 5 deletions
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@ -84,5 +84,5 @@ class ScoringFunctions(Protocol):
    @webmethod(route="/scoring_functions/register", method="POST")
    async def register_scoring_function(
-        self, function: ScoringFunctionDefWithProvider
+        self, function_def: ScoringFunctionDefWithProvider
    ) -> None: ...
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -15,10 +15,12 @@ from llama_stack.apis.models import *  # noqa: F403
 from llama_stack.apis.shields import *  # noqa: F403
 from llama_stack.apis.memory_banks import *  # noqa: F403
 from llama_stack.apis.datasets import *  # noqa: F403
 from llama_stack.apis.scoring_functions import *  # noqa: F403
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.inference import Inference
 from llama_stack.apis.memory import Memory
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.scoring import Scoring
 LLAMA_STACK_BUILD_CONFIG_VERSION = "2"
 LLAMA_STACK_RUN_CONFIG_VERSION = "2"
@ -32,6 +34,7 @@ RoutableObject = Union[
    ShieldDef,
    MemoryBankDef,
    DatasetDef,
    ScoringFunctionDef,
 ]
 RoutableObjectWithProvider = Union[
@ -39,6 +42,7 @@ RoutableObjectWithProvider = Union[
    ShieldDefWithProvider,
    MemoryBankDefWithProvider,
    DatasetDefWithProvider,
    ScoringFunctionDefWithProvider,
 ]
 RoutedProtocol = Union[
@ -46,6 +50,7 @@ RoutedProtocol = Union[
    Safety,
    Memory,
    DatasetIO,
    Scoring,
 ]
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@ -39,6 +39,10 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]:
            routing_table_api=Api.datasets,
            router_api=Api.datasetio,
        ),
        AutoRoutedApiInfo(
            routing_table_api=Api.scoring_functions,
            router_api=Api.scoring,
        ),
    ]
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@ -20,6 +20,8 @@ from llama_stack.apis.memory import Memory
 from llama_stack.apis.memory_banks import MemoryBanks
 from llama_stack.apis.models import Models
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.scoring import Scoring
 from llama_stack.apis.scoring_functions import ScoringFunctions
 from llama_stack.apis.shields import Shields
 from llama_stack.apis.telemetry import Telemetry
 from llama_stack.distribution.distribution import (
@ -42,6 +44,8 @@ def api_protocol_map() -> Dict[Api, Any]:
        Api.telemetry: Telemetry,
        Api.datasets: Datasets,
        Api.datasetio: DatasetIO,
        Api.scoring_functions: ScoringFunctions,
        Api.scoring: Scoring,
    }
@ -126,6 +130,12 @@ async def resolve_impls(run_config: StackRunConfig) -> Dict[Api, Any]:
            )
        }
        if info.router_api.value == "scoring":
            print("SCORING API")
            # p = all_api_providers[api][provider.provider_type]
            # p.deps__ = [a.value for a in p.api_dependencies]
        providers_with_specs[info.router_api.value] = {
            "__builtin__": ProviderWithSpec(
                provider_id="__autorouted__",
--- a/llama_stack/distribution/routers/init.py
+++ b/llama_stack/distribution/routers/init.py
@ -11,6 +11,7 @@ from .routing_tables import (
    DatasetsRoutingTable,
    MemoryBanksRoutingTable,
    ModelsRoutingTable,
    ScoringFunctionsRoutingTable,
    ShieldsRoutingTable,
 )
@ -25,7 +26,9 @@ async def get_routing_table_impl(
        "models": ModelsRoutingTable,
        "shields": ShieldsRoutingTable,
        "datasets": DatasetsRoutingTable,
        "scoring_functions": ScoringFunctionsRoutingTable,
    }
    if api.value not in api_to_tables:
        raise ValueError(f"API {api.value} not found in router map")
@ -35,17 +38,30 @@ async def get_routing_table_impl(
 async def get_auto_router_impl(api: Api, routing_table: RoutingTable, _deps) -> Any:
-    from .routers import DatasetIORouter, InferenceRouter, MemoryRouter, SafetyRouter
+    from .routers import (
        DatasetIORouter,
        InferenceRouter,
        MemoryRouter,
        SafetyRouter,
        ScoringRouter,
    )
    api_to_routers = {
        "memory": MemoryRouter,
        "inference": InferenceRouter,
        "safety": SafetyRouter,
        "datasetio": DatasetIORouter,
        "scoring": ScoringRouter,
    }
    if api.value not in api_to_routers:
        raise ValueError(f"API {api.value} not found in router map")
    # api_with_deps = {"scoring"}
    # if api.value in api_with_deps:
    #     impl = api_to_routers[api.value](routing_table, _deps)
    # else:
    #     impl = api_to_routers[api.value](routing_table)
    impl = api_to_routers[api.value](routing_table)
    await impl.initialize()
    return impl
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@ -13,6 +13,7 @@ from llama_stack.apis.memory import *  # noqa: F403
 from llama_stack.apis.inference import *  # noqa: F403
 from llama_stack.apis.safety import *  # noqa: F403
 from llama_stack.apis.datasetio import *  # noqa: F403
 from llama_stack.apis.scoring import *  # noqa: F403
 class MemoryRouter(Memory):
@ -192,3 +193,28 @@ class DatasetIORouter(DatasetIO):
            page_token=page_token,
            filter_condition=filter_condition,
        )
 class ScoringRouter(Scoring):
    def __init__(
        self,
        routing_table: RoutingTable,
    ) -> None:
        self.routing_table = routing_table
    async def initialize(self) -> None:
        pass
    async def shutdown(self) -> None:
        pass
    async def score_batch(
        self, dataset_id: str, scoring_functions: List[str]
    ) -> ScoreBatchResponse:
        # TODO
        pass
    async def score(
        self, input_rows: List[Dict[str, Any]], scoring_functions: List[str]
    ) -> ScoreResponse:
        pass
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@ -218,7 +218,25 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
    async def get_dataset(
        self, dataset_identifier: str
    ) -> Optional[DatasetDefWithProvider]:
-        return self.get_object_by_identifier(identifier)
+        return self.get_object_by_identifier(dataset_identifier)
    async def register_dataset(self, dataset_def: DatasetDefWithProvider) -> None:
        await self.register_object(dataset_def)
 class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, Scoring):
    async def list_scoring_functions(self) -> List[ScoringFunctionDefWithProvider]:
        objects = []
        for objs in self.registry.values():
            objects.extend(objs)
        return objects
    async def get_scoring_function(
        self, name: str
    ) -> Optional[ScoringFunctionDefWithProvider]:
        return self.get_object_by_identifier(name)
    async def register_scoring_function(
        self, function_def: ScoringFunctionDefWithProvider
    ) -> None:
        await self.register_object(function_def)
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@ -11,10 +11,9 @@ from llama_models.schema_utils import json_schema_type
 from pydantic import BaseModel, Field
 from llama_stack.apis.datasets import DatasetDef
 from llama_stack.apis.memory_banks import MemoryBankDef
 from llama_stack.apis.models import ModelDef
 from llama_stack.apis.scoring_functions import ScoringFunctionDef
 from llama_stack.apis.shields import ShieldDef
@ -25,6 +24,7 @@ class Api(Enum):
    agents = "agents"
    memory = "memory"
    datasetio = "datasetio"
    scoring = "scoring"
    telemetry = "telemetry"
@ -32,6 +32,7 @@ class Api(Enum):
    shields = "shields"
    memory_banks = "memory_banks"
    datasets = "datasets"
    scoring_functions = "scoring_functions"
    # built-in API
    inspect = "inspect"
@ -61,6 +62,14 @@ class DatasetsProtocolPrivate(Protocol):
    async def register_datasets(self, dataset_def: DatasetDef) -> None: ...
 class ScoringFunctionsProtocolPrivate(Protocol):
    async def list_scoring_functions(self) -> List[ScoringFunctionDef]: ...
    async def register_scoring_function(
        self, function_def: ScoringFunctionDef
    ) -> None: ...
@json_schema_type
 class ProviderSpec(BaseModel):
    api: Api
--- a/llama_stack/providers/impls/meta_reference/scoring/init.py
+++ b/llama_stack/providers/impls/meta_reference/scoring/init.py
@ -0,0 +1,18 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .config import MetaReferenceScoringConfig
 async def get_provider_impl(
    config: MetaReferenceScoringConfig,
    _deps,
 ):
    from .scoring import MetaReferenceScoringImpl
    impl = MetaReferenceScoringImpl(config)
    await impl.initialize()
    return impl
--- a/llama_stack/providers/impls/meta_reference/scoring/config.py
+++ b/llama_stack/providers/impls/meta_reference/scoring/config.py
@ -0,0 +1,9 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.apis.scoring import *  # noqa: F401, F403
 class MetaReferenceScoringConfig(BaseModel): ...
--- a/llama_stack/providers/impls/meta_reference/scoring/scoring.py
+++ b/llama_stack/providers/impls/meta_reference/scoring/scoring.py
@ -0,0 +1,33 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import List
 from llama_models.llama3.api.datatypes import *  # noqa: F403
 from llama_stack.apis.scoring import *  # noqa: F403
 from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
 from .config import MetaReferenceScoringConfig
 class MetaReferenceScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):
    def __init__(self, config: MetaReferenceScoringConfig) -> None:
        self.config = config
        self.dataset_infos = {}
    async def initialize(self) -> None: ...
    async def shutdown(self) -> None: ...
    async def score_batch(
        self, dataset_id: str, scoring_functions: List[str]
    ) -> ScoreBatchResponse:
        print("score_batch")
    async def score(
        self, input_rows: List[Dict[str, Any]], scoring_functions: List[str]
    ) -> ScoreResponse:
        print("score")
--- a/llama_stack/providers/registry/scoring.py
+++ b/llama_stack/providers/registry/scoring.py
@ -0,0 +1,24 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import List
 from llama_stack.distribution.datatypes import *  # noqa: F403
 def available_providers() -> List[ProviderSpec]:
    return [
        InlineProviderSpec(
            api=Api.scoring,
            provider_type="meta-reference",
            pip_packages=[],
            module="llama_stack.providers.impls.meta_reference.scoring",
            config_class="llama_stack.providers.impls.meta_reference.scoring.MetaReferenceScoringConfig",
            api_dependencies=[
                Api.datasetio,
            ],
        ),
    ]
--- a/llama_stack/providers/tests/scoring/init.py
+++ b/llama_stack/providers/tests/scoring/init.py
@ -0,0 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/llama_stack/providers/tests/scoring/provider_config_example.yaml
+++ b/llama_stack/providers/tests/scoring/provider_config_example.yaml
@ -0,0 +1,9 @@
 providers:
  datasetio:
  - provider_id: test-meta
    provider_type: meta-reference
    config: {}
  scoring:
    - provider_id: test-meta
      provider_type: meta-reference
      config: {}
--- a/llama_stack/providers/tests/scoring/test_scoring.py
+++ b/llama_stack/providers/tests/scoring/test_scoring.py
@ -0,0 +1,49 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import pytest
 import pytest_asyncio
 from llama_stack.apis.common.type_system import *  # noqa: F403
 from llama_stack.apis.datasetio import *  # noqa: F403
 from llama_stack.distribution.datatypes import *  # noqa: F403
 from llama_stack.providers.tests.resolver import resolve_impls_for_test
 # How to run this test:
 #
 # 1. Ensure you have a conda with the right dependencies installed. This is a bit tricky
 #    since it depends on the provider you are testing. On top of that you need
 #    `pytest` and `pytest-asyncio` installed.
 #
 # 2. Copy and modify the provider_config_example.yaml depending on the provider you are testing.
 #
 # 3. Run:
 #
 # ```bash
 # PROVIDER_ID=<your_provider> \
 #   PROVIDER_CONFIG=provider_config.yaml \
 #   pytest -s llama_stack/providers/tests/scoring/test_scoring.py \
 #   --tb=short --disable-warnings
 # ```
@pytest_asyncio.fixture(scope="session")
 async def scoring_settings():
    impls = await resolve_impls_for_test(Api.scoring, deps=[Api.datasetio])
    return {
        "scoring_impl": impls[Api.scoring],
        "scoring_functions_impl": impls[Api.scoring_functions],
    }
@pytest.mark.asyncio
 async def test_scoring_functions_list(scoring_settings):
    # NOTE: this needs you to ensure that you are starting from a clean state
    # but so far we don't have an unregister API unfortunately, so be careful
    scoring_functions_impl = scoring_settings["scoring_functions_impl"]
    response = await scoring_functions_impl.list_scoring_functions()
    assert isinstance(response, list)
    assert len(response) == 0