feat: create HTTP DELETE API endpoints to unregister ScoringFn and Benchmark resources in Llama Stack (#3371)

# What does this PR do?  This PR provides functionality for users to unregister ScoringFn and Benchmark resources for `scoring` and `eval` APIs.   Closes #3051 ## Test Plan  Updated integration and unit tests via CI workflow
2025-12-03 09:53:45 +00:00 · 2025-09-15 20:43:38 +01:00 · 2025-09-15 20:43:38 +01:00 · ab321739f2
commit ab321739f2
parent 01bdcce4d2
13 changed files with 241 additions and 3 deletions
--- a/tests/integration/scoring/test_scoring.py
+++ b/tests/integration/scoring/test_scoring.py
@ -9,6 +9,7 @@ from pathlib import Path

 import pandas as pd
 import pytest
+import requests


@pytest.fixture
@ -77,7 +78,46 @@ def test_scoring_functions_register(
    assert len(list_response) > 0
    assert any(x.identifier == sample_scoring_fn_id for x in list_response)

-    # TODO: add unregister api for scoring functions
+
+def test_scoring_functions_unregister(
+    llama_stack_client,
+    sample_scoring_fn_id,
+    judge_model_id,
+    sample_judge_prompt_template,
+):
+    llm_as_judge_provider = [
+        x
+        for x in llama_stack_client.providers.list()
+        if x.api == "scoring" and x.provider_type == "inline::llm-as-judge"
+    ]
+    if len(llm_as_judge_provider) == 0:
+        pytest.skip("No llm-as-judge provider found, cannot test unregister")
+
+    llm_as_judge_provider_id = llm_as_judge_provider[0].provider_id
+
+    # Register first
+    register_scoring_function(
+        llama_stack_client,
+        llm_as_judge_provider_id,
+        sample_scoring_fn_id,
+        judge_model_id,
+        sample_judge_prompt_template,
+    )
+
+    # Ensure it is present
+    list_response = llama_stack_client.scoring_functions.list()
+    assert any(x.identifier == sample_scoring_fn_id for x in list_response)
+
+    # Unregister scoring fn
+    try:
+        base_url = llama_stack_client.base_url
+    except AttributeError:
+        pytest.skip("No server base_url available; cannot test HTTP unregister in library mode")
+
+    resp = requests.delete(f"{base_url}/v1/scoring-functions/{sample_scoring_fn_id}", timeout=30)
+    assert resp.status_code in (200, 204)
+    list_after = llama_stack_client.scoring_functions.list()
+    assert all(x.identifier != sample_scoring_fn_id for x in list_after)


@pytest.mark.parametrize("scoring_fn_id", ["basic::equality"])
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@ -105,6 +105,9 @@ class ScoringFunctionsImpl(Impl):
    async def register_scoring_function(self, scoring_fn):
        return scoring_fn

+    async def unregister_scoring_function(self, scoring_fn_id: str):
+        return scoring_fn_id
+

 class BenchmarksImpl(Impl):
    def __init__(self):
@ -113,6 +116,9 @@ class BenchmarksImpl(Impl):
    async def register_benchmark(self, benchmark):
        return benchmark

+    async def unregister_benchmark(self, benchmark_id: str):
+        return benchmark_id
+

 class ToolGroupsImpl(Impl):
    def __init__(self):
@ -330,6 +336,13 @@ async def test_scoring_functions_routing_table(cached_disk_dist_registry):
    assert "test-scoring-fn" in scoring_fn_ids
    assert "test-scoring-fn-2" in scoring_fn_ids

+    # Unregister scoring functions and verify listing
+    for i in range(len(scoring_functions.data)):
+        await table.unregister_scoring_function(scoring_functions.data[i].scoring_fn_id)
+
+    scoring_functions_list_after_deletion = await table.list_scoring_functions()
+    assert len(scoring_functions_list_after_deletion.data) == 0
+

 async def test_benchmarks_routing_table(cached_disk_dist_registry):
    table = BenchmarksRoutingTable({"test_provider": BenchmarksImpl()}, cached_disk_dist_registry, {})
@ -347,6 +360,15 @@ async def test_benchmarks_routing_table(cached_disk_dist_registry):
    benchmark_ids = {b.identifier for b in benchmarks.data}
    assert "test-benchmark" in benchmark_ids

+    # Unregister the benchmark and verify removal
+    await table.unregister_benchmark(benchmark_id="test-benchmark")
+    benchmarks_after = await table.list_benchmarks()
+    assert len(benchmarks_after.data) == 0
+
+    # Unregistering a non-existent benchmark should raise a clear error
+    with pytest.raises(ValueError, match="Benchmark 'dummy_benchmark' not found"):
+        await table.unregister_benchmark(benchmark_id="dummy_benchmark")
+

 async def test_tool_groups_routing_table(cached_disk_dist_registry):
    table = ToolGroupsRoutingTable({"test_provider": ToolGroupsImpl()}, cached_disk_dist_registry, {})
--- a/tests/unit/providers/nvidia/test_eval.py
+++ b/tests/unit/providers/nvidia/test_eval.py
@ -52,14 +52,19 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
        self.evaluator_post_patcher = patch(
            "llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_post"
        )
+        self.evaluator_delete_patcher = patch(
+            "llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_delete"
+        )

        self.mock_evaluator_get = self.evaluator_get_patcher.start()
        self.mock_evaluator_post = self.evaluator_post_patcher.start()
+        self.mock_evaluator_delete = self.evaluator_delete_patcher.start()

    def tearDown(self):
        """Clean up after each test."""
        self.evaluator_get_patcher.stop()
        self.evaluator_post_patcher.stop()
+        self.evaluator_delete_patcher.stop()

    def _assert_request_body(self, expected_json):
        """Helper method to verify request body in Evaluator POST request is correct"""
@ -115,6 +120,13 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
        self.mock_evaluator_post.assert_called_once()
        self._assert_request_body({"namespace": benchmark.provider_id, "name": benchmark.identifier, **eval_config})

+    def test_unregister_benchmark(self):
+        # Unregister the benchmark
+        self.run_async(self.eval_impl.unregister_benchmark(benchmark_id=MOCK_BENCHMARK_ID))
+
+        # Verify the Evaluator API was called correctly
+        self.mock_evaluator_delete.assert_called_once_with(f"/v1/evaluation/configs/nvidia/{MOCK_BENCHMARK_ID}")
+
    def test_run_eval(self):
        benchmark_config = BenchmarkConfig(
            eval_candidate=ModelCandidate(