mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
feat: create HTTP DELETE API endpoints to unregister ScoringFn and Benchmark resources in Llama Stack (#3371)
# What does this PR do? <!-- Provide a short summary of what this PR does and why. Link to relevant issues if applicable. --> This PR provides functionality for users to unregister ScoringFn and Benchmark resources for `scoring` and `eval` APIs. <!-- If resolving an issue, uncomment and update the line below --> <!-- Closes #[issue-number] --> Closes #3051 ## Test Plan <!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* --> Updated integration and unit tests via CI workflow
This commit is contained in:
parent
01bdcce4d2
commit
ab321739f2
13 changed files with 241 additions and 3 deletions
|
@ -9,6 +9,7 @@ from pathlib import Path
|
|||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -77,7 +78,46 @@ def test_scoring_functions_register(
|
|||
assert len(list_response) > 0
|
||||
assert any(x.identifier == sample_scoring_fn_id for x in list_response)
|
||||
|
||||
# TODO: add unregister api for scoring functions
|
||||
|
||||
def test_scoring_functions_unregister(
|
||||
llama_stack_client,
|
||||
sample_scoring_fn_id,
|
||||
judge_model_id,
|
||||
sample_judge_prompt_template,
|
||||
):
|
||||
llm_as_judge_provider = [
|
||||
x
|
||||
for x in llama_stack_client.providers.list()
|
||||
if x.api == "scoring" and x.provider_type == "inline::llm-as-judge"
|
||||
]
|
||||
if len(llm_as_judge_provider) == 0:
|
||||
pytest.skip("No llm-as-judge provider found, cannot test unregister")
|
||||
|
||||
llm_as_judge_provider_id = llm_as_judge_provider[0].provider_id
|
||||
|
||||
# Register first
|
||||
register_scoring_function(
|
||||
llama_stack_client,
|
||||
llm_as_judge_provider_id,
|
||||
sample_scoring_fn_id,
|
||||
judge_model_id,
|
||||
sample_judge_prompt_template,
|
||||
)
|
||||
|
||||
# Ensure it is present
|
||||
list_response = llama_stack_client.scoring_functions.list()
|
||||
assert any(x.identifier == sample_scoring_fn_id for x in list_response)
|
||||
|
||||
# Unregister scoring fn
|
||||
try:
|
||||
base_url = llama_stack_client.base_url
|
||||
except AttributeError:
|
||||
pytest.skip("No server base_url available; cannot test HTTP unregister in library mode")
|
||||
|
||||
resp = requests.delete(f"{base_url}/v1/scoring-functions/{sample_scoring_fn_id}", timeout=30)
|
||||
assert resp.status_code in (200, 204)
|
||||
list_after = llama_stack_client.scoring_functions.list()
|
||||
assert all(x.identifier != sample_scoring_fn_id for x in list_after)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("scoring_fn_id", ["basic::equality"])
|
||||
|
|
|
@ -105,6 +105,9 @@ class ScoringFunctionsImpl(Impl):
|
|||
async def register_scoring_function(self, scoring_fn):
|
||||
return scoring_fn
|
||||
|
||||
async def unregister_scoring_function(self, scoring_fn_id: str):
|
||||
return scoring_fn_id
|
||||
|
||||
|
||||
class BenchmarksImpl(Impl):
|
||||
def __init__(self):
|
||||
|
@ -113,6 +116,9 @@ class BenchmarksImpl(Impl):
|
|||
async def register_benchmark(self, benchmark):
|
||||
return benchmark
|
||||
|
||||
async def unregister_benchmark(self, benchmark_id: str):
|
||||
return benchmark_id
|
||||
|
||||
|
||||
class ToolGroupsImpl(Impl):
|
||||
def __init__(self):
|
||||
|
@ -330,6 +336,13 @@ async def test_scoring_functions_routing_table(cached_disk_dist_registry):
|
|||
assert "test-scoring-fn" in scoring_fn_ids
|
||||
assert "test-scoring-fn-2" in scoring_fn_ids
|
||||
|
||||
# Unregister scoring functions and verify listing
|
||||
for i in range(len(scoring_functions.data)):
|
||||
await table.unregister_scoring_function(scoring_functions.data[i].scoring_fn_id)
|
||||
|
||||
scoring_functions_list_after_deletion = await table.list_scoring_functions()
|
||||
assert len(scoring_functions_list_after_deletion.data) == 0
|
||||
|
||||
|
||||
async def test_benchmarks_routing_table(cached_disk_dist_registry):
|
||||
table = BenchmarksRoutingTable({"test_provider": BenchmarksImpl()}, cached_disk_dist_registry, {})
|
||||
|
@ -347,6 +360,15 @@ async def test_benchmarks_routing_table(cached_disk_dist_registry):
|
|||
benchmark_ids = {b.identifier for b in benchmarks.data}
|
||||
assert "test-benchmark" in benchmark_ids
|
||||
|
||||
# Unregister the benchmark and verify removal
|
||||
await table.unregister_benchmark(benchmark_id="test-benchmark")
|
||||
benchmarks_after = await table.list_benchmarks()
|
||||
assert len(benchmarks_after.data) == 0
|
||||
|
||||
# Unregistering a non-existent benchmark should raise a clear error
|
||||
with pytest.raises(ValueError, match="Benchmark 'dummy_benchmark' not found"):
|
||||
await table.unregister_benchmark(benchmark_id="dummy_benchmark")
|
||||
|
||||
|
||||
async def test_tool_groups_routing_table(cached_disk_dist_registry):
|
||||
table = ToolGroupsRoutingTable({"test_provider": ToolGroupsImpl()}, cached_disk_dist_registry, {})
|
||||
|
|
|
@ -52,14 +52,19 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
|
|||
self.evaluator_post_patcher = patch(
|
||||
"llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_post"
|
||||
)
|
||||
self.evaluator_delete_patcher = patch(
|
||||
"llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_delete"
|
||||
)
|
||||
|
||||
self.mock_evaluator_get = self.evaluator_get_patcher.start()
|
||||
self.mock_evaluator_post = self.evaluator_post_patcher.start()
|
||||
self.mock_evaluator_delete = self.evaluator_delete_patcher.start()
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up after each test."""
|
||||
self.evaluator_get_patcher.stop()
|
||||
self.evaluator_post_patcher.stop()
|
||||
self.evaluator_delete_patcher.stop()
|
||||
|
||||
def _assert_request_body(self, expected_json):
|
||||
"""Helper method to verify request body in Evaluator POST request is correct"""
|
||||
|
@ -115,6 +120,13 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
|
|||
self.mock_evaluator_post.assert_called_once()
|
||||
self._assert_request_body({"namespace": benchmark.provider_id, "name": benchmark.identifier, **eval_config})
|
||||
|
||||
def test_unregister_benchmark(self):
|
||||
# Unregister the benchmark
|
||||
self.run_async(self.eval_impl.unregister_benchmark(benchmark_id=MOCK_BENCHMARK_ID))
|
||||
|
||||
# Verify the Evaluator API was called correctly
|
||||
self.mock_evaluator_delete.assert_called_once_with(f"/v1/evaluation/configs/nvidia/{MOCK_BENCHMARK_ID}")
|
||||
|
||||
def test_run_eval(self):
|
||||
benchmark_config = BenchmarkConfig(
|
||||
eval_candidate=ModelCandidate(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue