mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
feat: create HTTP DELETE API endpoints to unregister ScoringFn and Benchmark resources in Llama Stack (#3371)
# What does this PR do? <!-- Provide a short summary of what this PR does and why. Link to relevant issues if applicable. --> This PR provides functionality for users to unregister ScoringFn and Benchmark resources for `scoring` and `eval` APIs. <!-- If resolving an issue, uncomment and update the line below --> <!-- Closes #[issue-number] --> Closes #3051 ## Test Plan <!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* --> Updated integration and unit tests via CI workflow
This commit is contained in:
parent
01bdcce4d2
commit
ab321739f2
13 changed files with 241 additions and 3 deletions
|
@ -93,3 +93,11 @@ class Benchmarks(Protocol):
|
|||
:param metadata: The metadata to use for the benchmark.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE")
|
||||
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||
"""Unregister a benchmark.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to unregister.
|
||||
"""
|
||||
...
|
||||
|
|
|
@ -197,3 +197,11 @@ class ScoringFunctions(Protocol):
|
|||
:param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
|
||||
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
|
||||
"""Unregister a scoring function.
|
||||
|
||||
:param scoring_fn_id: The ID of the scoring function to unregister.
|
||||
"""
|
||||
...
|
||||
|
|
|
@ -56,3 +56,7 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
|
|||
provider_resource_id=provider_benchmark_id,
|
||||
)
|
||||
await self.register_object(benchmark)
|
||||
|
||||
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||
existing_benchmark = await self.get_benchmark(benchmark_id)
|
||||
await self.unregister_object(existing_benchmark)
|
||||
|
|
|
@ -64,6 +64,10 @@ async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
|
|||
return await p.unregister_shield(obj.identifier)
|
||||
elif api == Api.datasetio:
|
||||
return await p.unregister_dataset(obj.identifier)
|
||||
elif api == Api.eval:
|
||||
return await p.unregister_benchmark(obj.identifier)
|
||||
elif api == Api.scoring:
|
||||
return await p.unregister_scoring_function(obj.identifier)
|
||||
elif api == Api.tool_runtime:
|
||||
return await p.unregister_toolgroup(obj.identifier)
|
||||
else:
|
||||
|
|
|
@ -60,3 +60,7 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
|
|||
)
|
||||
scoring_fn.provider_id = provider_id
|
||||
await self.register_object(scoring_fn)
|
||||
|
||||
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
|
||||
existing_scoring_fn = await self.get_scoring_function(scoring_fn_id)
|
||||
await self.unregister_object(existing_scoring_fn)
|
||||
|
|
|
@ -75,6 +75,13 @@ class MetaReferenceEvalImpl(
|
|||
)
|
||||
self.benchmarks[task_def.identifier] = task_def
|
||||
|
||||
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||
if benchmark_id in self.benchmarks:
|
||||
del self.benchmarks[benchmark_id]
|
||||
|
||||
key = f"{EVAL_TASKS_PREFIX}{benchmark_id}"
|
||||
await self.kvstore.delete(key)
|
||||
|
||||
async def run_eval(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
|
|
|
@ -63,6 +63,9 @@ class LlmAsJudgeScoringImpl(
|
|||
async def register_scoring_function(self, function_def: ScoringFn) -> None:
|
||||
self.llm_as_judge_fn.register_scoring_fn_def(function_def)
|
||||
|
||||
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
|
||||
self.llm_as_judge_fn.unregister_scoring_fn_def(scoring_fn_id)
|
||||
|
||||
async def score_batch(
|
||||
self,
|
||||
dataset_id: str,
|
||||
|
|
|
@ -51,18 +51,23 @@ class NVIDIAEvalImpl(
|
|||
|
||||
async def shutdown(self) -> None: ...
|
||||
|
||||
async def _evaluator_get(self, path):
|
||||
async def _evaluator_get(self, path: str):
|
||||
"""Helper for making GET requests to the evaluator service."""
|
||||
response = requests.get(url=f"{self.config.evaluator_url}{path}")
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
async def _evaluator_post(self, path, data):
|
||||
async def _evaluator_post(self, path: str, data: dict[str, Any]):
|
||||
"""Helper for making POST requests to the evaluator service."""
|
||||
response = requests.post(url=f"{self.config.evaluator_url}{path}", json=data)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
async def _evaluator_delete(self, path: str) -> None:
|
||||
"""Helper for making DELETE requests to the evaluator service."""
|
||||
response = requests.delete(url=f"{self.config.evaluator_url}{path}")
|
||||
response.raise_for_status()
|
||||
|
||||
async def register_benchmark(self, task_def: Benchmark) -> None:
|
||||
"""Register a benchmark as an evaluation configuration."""
|
||||
await self._evaluator_post(
|
||||
|
@ -75,6 +80,10 @@ class NVIDIAEvalImpl(
|
|||
},
|
||||
)
|
||||
|
||||
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||
"""Unregister a benchmark evaluation configuration from NeMo Evaluator."""
|
||||
await self._evaluator_delete(f"/v1/evaluation/configs/{DEFAULT_NAMESPACE}/{benchmark_id}")
|
||||
|
||||
async def run_eval(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue