mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
feat: create HTTP DELETE API endpoints to allow users to free up ScoringFn and Benchmark resources in LS
This commit is contained in:
parent
b6cb817897
commit
0591451ed6
13 changed files with 241 additions and 3 deletions
68
docs/_static/llama-stack-spec.html
vendored
68
docs/_static/llama-stack-spec.html
vendored
|
@ -1380,6 +1380,40 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"delete": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK"
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Benchmarks"
|
||||||
|
],
|
||||||
|
"description": "Unregister a benchmark.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "benchmark_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The ID of the benchmark to unregister.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/openai/v1/chat/completions/{completion_id}": {
|
"/v1/openai/v1/chat/completions/{completion_id}": {
|
||||||
|
@ -1620,6 +1654,40 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"delete": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK"
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"ScoringFunctions"
|
||||||
|
],
|
||||||
|
"description": "Unregister a scoring function.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "scoring_fn_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The ID of the scoring function to unregister.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/shields/{identifier}": {
|
"/v1/shields/{identifier}": {
|
||||||
|
|
49
docs/_static/llama-stack-spec.yaml
vendored
49
docs/_static/llama-stack-spec.yaml
vendored
|
@ -954,6 +954,30 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
delete:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: OK
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Benchmarks
|
||||||
|
description: Unregister a benchmark.
|
||||||
|
parameters:
|
||||||
|
- name: benchmark_id
|
||||||
|
in: path
|
||||||
|
description: The ID of the benchmark to unregister.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
/v1/openai/v1/chat/completions/{completion_id}:
|
/v1/openai/v1/chat/completions/{completion_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
@ -1119,6 +1143,31 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
delete:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: OK
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- ScoringFunctions
|
||||||
|
description: Unregister a scoring function.
|
||||||
|
parameters:
|
||||||
|
- name: scoring_fn_id
|
||||||
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the scoring function to unregister.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
/v1/shields/{identifier}:
|
/v1/shields/{identifier}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
|
@ -93,3 +93,11 @@ class Benchmarks(Protocol):
|
||||||
:param metadata: The metadata to use for the benchmark.
|
:param metadata: The metadata to use for the benchmark.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE")
|
||||||
|
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||||
|
"""Unregister a benchmark.
|
||||||
|
|
||||||
|
:param benchmark_id: The ID of the benchmark to unregister.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
|
@ -197,3 +197,11 @@ class ScoringFunctions(Protocol):
|
||||||
:param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval.
|
:param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
|
||||||
|
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
|
||||||
|
"""Unregister a scoring function.
|
||||||
|
|
||||||
|
:param scoring_fn_id: The ID of the scoring function to unregister.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
|
@ -56,3 +56,7 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
|
||||||
provider_resource_id=provider_benchmark_id,
|
provider_resource_id=provider_benchmark_id,
|
||||||
)
|
)
|
||||||
await self.register_object(benchmark)
|
await self.register_object(benchmark)
|
||||||
|
|
||||||
|
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||||
|
existing_benchmark = await self.get_benchmark(benchmark_id)
|
||||||
|
await self.unregister_object(existing_benchmark)
|
||||||
|
|
|
@ -64,6 +64,10 @@ async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
|
||||||
return await p.unregister_shield(obj.identifier)
|
return await p.unregister_shield(obj.identifier)
|
||||||
elif api == Api.datasetio:
|
elif api == Api.datasetio:
|
||||||
return await p.unregister_dataset(obj.identifier)
|
return await p.unregister_dataset(obj.identifier)
|
||||||
|
elif api == Api.eval:
|
||||||
|
return await p.unregister_benchmark(obj.identifier)
|
||||||
|
elif api == Api.scoring:
|
||||||
|
return await p.unregister_scoring_function(obj.identifier)
|
||||||
elif api == Api.tool_runtime:
|
elif api == Api.tool_runtime:
|
||||||
return await p.unregister_toolgroup(obj.identifier)
|
return await p.unregister_toolgroup(obj.identifier)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -60,3 +60,7 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
|
||||||
)
|
)
|
||||||
scoring_fn.provider_id = provider_id
|
scoring_fn.provider_id = provider_id
|
||||||
await self.register_object(scoring_fn)
|
await self.register_object(scoring_fn)
|
||||||
|
|
||||||
|
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
|
||||||
|
existing_scoring_fn = await self.get_scoring_function(scoring_fn_id)
|
||||||
|
await self.unregister_object(existing_scoring_fn)
|
||||||
|
|
|
@ -75,6 +75,13 @@ class MetaReferenceEvalImpl(
|
||||||
)
|
)
|
||||||
self.benchmarks[task_def.identifier] = task_def
|
self.benchmarks[task_def.identifier] = task_def
|
||||||
|
|
||||||
|
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||||
|
if benchmark_id in self.benchmarks:
|
||||||
|
del self.benchmarks[benchmark_id]
|
||||||
|
|
||||||
|
key = f"{EVAL_TASKS_PREFIX}{benchmark_id}"
|
||||||
|
await self.kvstore.delete(key)
|
||||||
|
|
||||||
async def run_eval(
|
async def run_eval(
|
||||||
self,
|
self,
|
||||||
benchmark_id: str,
|
benchmark_id: str,
|
||||||
|
|
|
@ -63,6 +63,9 @@ class LlmAsJudgeScoringImpl(
|
||||||
async def register_scoring_function(self, function_def: ScoringFn) -> None:
|
async def register_scoring_function(self, function_def: ScoringFn) -> None:
|
||||||
self.llm_as_judge_fn.register_scoring_fn_def(function_def)
|
self.llm_as_judge_fn.register_scoring_fn_def(function_def)
|
||||||
|
|
||||||
|
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
|
||||||
|
self.llm_as_judge_fn.unregister_scoring_fn_def(scoring_fn_id)
|
||||||
|
|
||||||
async def score_batch(
|
async def score_batch(
|
||||||
self,
|
self,
|
||||||
dataset_id: str,
|
dataset_id: str,
|
||||||
|
|
|
@ -51,18 +51,23 @@ class NVIDIAEvalImpl(
|
||||||
|
|
||||||
async def shutdown(self) -> None: ...
|
async def shutdown(self) -> None: ...
|
||||||
|
|
||||||
async def _evaluator_get(self, path):
|
async def _evaluator_get(self, path: str):
|
||||||
"""Helper for making GET requests to the evaluator service."""
|
"""Helper for making GET requests to the evaluator service."""
|
||||||
response = requests.get(url=f"{self.config.evaluator_url}{path}")
|
response = requests.get(url=f"{self.config.evaluator_url}{path}")
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
async def _evaluator_post(self, path, data):
|
async def _evaluator_post(self, path: str, data: dict[str, Any]):
|
||||||
"""Helper for making POST requests to the evaluator service."""
|
"""Helper for making POST requests to the evaluator service."""
|
||||||
response = requests.post(url=f"{self.config.evaluator_url}{path}", json=data)
|
response = requests.post(url=f"{self.config.evaluator_url}{path}", json=data)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
|
async def _evaluator_delete(self, path: str) -> None:
|
||||||
|
"""Helper for making DELETE requests to the evaluator service."""
|
||||||
|
response = requests.delete(url=f"{self.config.evaluator_url}{path}")
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
async def register_benchmark(self, task_def: Benchmark) -> None:
|
async def register_benchmark(self, task_def: Benchmark) -> None:
|
||||||
"""Register a benchmark as an evaluation configuration."""
|
"""Register a benchmark as an evaluation configuration."""
|
||||||
await self._evaluator_post(
|
await self._evaluator_post(
|
||||||
|
@ -75,6 +80,10 @@ class NVIDIAEvalImpl(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||||
|
"""Unregister a benchmark evaluation configuration from NeMo Evaluator."""
|
||||||
|
await self._evaluator_delete(f"/v1/evaluation/configs/{DEFAULT_NAMESPACE}/{benchmark_id}")
|
||||||
|
|
||||||
async def run_eval(
|
async def run_eval(
|
||||||
self,
|
self,
|
||||||
benchmark_id: str,
|
benchmark_id: str,
|
||||||
|
|
|
@ -9,6 +9,7 @@ from pathlib import Path
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
@ -77,7 +78,46 @@ def test_scoring_functions_register(
|
||||||
assert len(list_response) > 0
|
assert len(list_response) > 0
|
||||||
assert any(x.identifier == sample_scoring_fn_id for x in list_response)
|
assert any(x.identifier == sample_scoring_fn_id for x in list_response)
|
||||||
|
|
||||||
# TODO: add unregister api for scoring functions
|
|
||||||
|
def test_scoring_functions_unregister(
|
||||||
|
llama_stack_client,
|
||||||
|
sample_scoring_fn_id,
|
||||||
|
judge_model_id,
|
||||||
|
sample_judge_prompt_template,
|
||||||
|
):
|
||||||
|
llm_as_judge_provider = [
|
||||||
|
x
|
||||||
|
for x in llama_stack_client.providers.list()
|
||||||
|
if x.api == "scoring" and x.provider_type == "inline::llm-as-judge"
|
||||||
|
]
|
||||||
|
if len(llm_as_judge_provider) == 0:
|
||||||
|
pytest.skip("No llm-as-judge provider found, cannot test unregister")
|
||||||
|
|
||||||
|
llm_as_judge_provider_id = llm_as_judge_provider[0].provider_id
|
||||||
|
|
||||||
|
# Register first
|
||||||
|
register_scoring_function(
|
||||||
|
llama_stack_client,
|
||||||
|
llm_as_judge_provider_id,
|
||||||
|
sample_scoring_fn_id,
|
||||||
|
judge_model_id,
|
||||||
|
sample_judge_prompt_template,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Ensure it is present
|
||||||
|
list_response = llama_stack_client.scoring_functions.list()
|
||||||
|
assert any(x.identifier == sample_scoring_fn_id for x in list_response)
|
||||||
|
|
||||||
|
# Unregister scoring fn
|
||||||
|
try:
|
||||||
|
base_url = llama_stack_client.base_url
|
||||||
|
except AttributeError:
|
||||||
|
pytest.skip("No server base_url available; cannot test HTTP unregister in library mode")
|
||||||
|
|
||||||
|
resp = requests.delete(f"{base_url}/v1/scoring-functions/{sample_scoring_fn_id}", timeout=30)
|
||||||
|
assert resp.status_code in (200, 204)
|
||||||
|
list_after = llama_stack_client.scoring_functions.list()
|
||||||
|
assert all(x.identifier != sample_scoring_fn_id for x in list_after)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("scoring_fn_id", ["basic::equality"])
|
@pytest.mark.parametrize("scoring_fn_id", ["basic::equality"])
|
||||||
|
|
|
@ -105,6 +105,9 @@ class ScoringFunctionsImpl(Impl):
|
||||||
async def register_scoring_function(self, scoring_fn):
|
async def register_scoring_function(self, scoring_fn):
|
||||||
return scoring_fn
|
return scoring_fn
|
||||||
|
|
||||||
|
async def unregister_scoring_function(self, scoring_fn_id: str):
|
||||||
|
return scoring_fn_id
|
||||||
|
|
||||||
|
|
||||||
class BenchmarksImpl(Impl):
|
class BenchmarksImpl(Impl):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -113,6 +116,9 @@ class BenchmarksImpl(Impl):
|
||||||
async def register_benchmark(self, benchmark):
|
async def register_benchmark(self, benchmark):
|
||||||
return benchmark
|
return benchmark
|
||||||
|
|
||||||
|
async def unregister_benchmark(self, benchmark_id: str):
|
||||||
|
return benchmark_id
|
||||||
|
|
||||||
|
|
||||||
class ToolGroupsImpl(Impl):
|
class ToolGroupsImpl(Impl):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -330,6 +336,13 @@ async def test_scoring_functions_routing_table(cached_disk_dist_registry):
|
||||||
assert "test-scoring-fn" in scoring_fn_ids
|
assert "test-scoring-fn" in scoring_fn_ids
|
||||||
assert "test-scoring-fn-2" in scoring_fn_ids
|
assert "test-scoring-fn-2" in scoring_fn_ids
|
||||||
|
|
||||||
|
# Unregister scoring functions and verify listing
|
||||||
|
for i in range(len(scoring_functions.data)):
|
||||||
|
await table.unregister_scoring_function(scoring_functions.data[i].scoring_fn_id)
|
||||||
|
|
||||||
|
scoring_functions_list_after_deletion = await table.list_scoring_functions()
|
||||||
|
assert len(scoring_functions_list_after_deletion.data) == 0
|
||||||
|
|
||||||
|
|
||||||
async def test_benchmarks_routing_table(cached_disk_dist_registry):
|
async def test_benchmarks_routing_table(cached_disk_dist_registry):
|
||||||
table = BenchmarksRoutingTable({"test_provider": BenchmarksImpl()}, cached_disk_dist_registry, {})
|
table = BenchmarksRoutingTable({"test_provider": BenchmarksImpl()}, cached_disk_dist_registry, {})
|
||||||
|
@ -347,6 +360,15 @@ async def test_benchmarks_routing_table(cached_disk_dist_registry):
|
||||||
benchmark_ids = {b.identifier for b in benchmarks.data}
|
benchmark_ids = {b.identifier for b in benchmarks.data}
|
||||||
assert "test-benchmark" in benchmark_ids
|
assert "test-benchmark" in benchmark_ids
|
||||||
|
|
||||||
|
# Unregister the benchmark and verify removal
|
||||||
|
await table.unregister_benchmark(benchmark_id="test-benchmark")
|
||||||
|
benchmarks_after = await table.list_benchmarks()
|
||||||
|
assert len(benchmarks_after.data) == 0
|
||||||
|
|
||||||
|
# Unregistering a non-existent benchmark should raise a clear error
|
||||||
|
with pytest.raises(ValueError, match="Benchmark 'dummy_benchmark' not found"):
|
||||||
|
await table.unregister_benchmark(benchmark_id="dummy_benchmark")
|
||||||
|
|
||||||
|
|
||||||
async def test_tool_groups_routing_table(cached_disk_dist_registry):
|
async def test_tool_groups_routing_table(cached_disk_dist_registry):
|
||||||
table = ToolGroupsRoutingTable({"test_provider": ToolGroupsImpl()}, cached_disk_dist_registry, {})
|
table = ToolGroupsRoutingTable({"test_provider": ToolGroupsImpl()}, cached_disk_dist_registry, {})
|
||||||
|
|
|
@ -52,14 +52,19 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
|
||||||
self.evaluator_post_patcher = patch(
|
self.evaluator_post_patcher = patch(
|
||||||
"llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_post"
|
"llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_post"
|
||||||
)
|
)
|
||||||
|
self.evaluator_delete_patcher = patch(
|
||||||
|
"llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_delete"
|
||||||
|
)
|
||||||
|
|
||||||
self.mock_evaluator_get = self.evaluator_get_patcher.start()
|
self.mock_evaluator_get = self.evaluator_get_patcher.start()
|
||||||
self.mock_evaluator_post = self.evaluator_post_patcher.start()
|
self.mock_evaluator_post = self.evaluator_post_patcher.start()
|
||||||
|
self.mock_evaluator_delete = self.evaluator_delete_patcher.start()
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
"""Clean up after each test."""
|
"""Clean up after each test."""
|
||||||
self.evaluator_get_patcher.stop()
|
self.evaluator_get_patcher.stop()
|
||||||
self.evaluator_post_patcher.stop()
|
self.evaluator_post_patcher.stop()
|
||||||
|
self.evaluator_delete_patcher.stop()
|
||||||
|
|
||||||
def _assert_request_body(self, expected_json):
|
def _assert_request_body(self, expected_json):
|
||||||
"""Helper method to verify request body in Evaluator POST request is correct"""
|
"""Helper method to verify request body in Evaluator POST request is correct"""
|
||||||
|
@ -115,6 +120,13 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
|
||||||
self.mock_evaluator_post.assert_called_once()
|
self.mock_evaluator_post.assert_called_once()
|
||||||
self._assert_request_body({"namespace": benchmark.provider_id, "name": benchmark.identifier, **eval_config})
|
self._assert_request_body({"namespace": benchmark.provider_id, "name": benchmark.identifier, **eval_config})
|
||||||
|
|
||||||
|
def test_unregister_benchmark(self):
|
||||||
|
# Unregister the benchmark
|
||||||
|
self.run_async(self.eval_impl.unregister_benchmark(benchmark_id=MOCK_BENCHMARK_ID))
|
||||||
|
|
||||||
|
# Verify the Evaluator API was called correctly
|
||||||
|
self.mock_evaluator_delete.assert_called_once_with(f"/v1/evaluation/configs/nvidia/{MOCK_BENCHMARK_ID}")
|
||||||
|
|
||||||
def test_run_eval(self):
|
def test_run_eval(self):
|
||||||
benchmark_config = BenchmarkConfig(
|
benchmark_config = BenchmarkConfig(
|
||||||
eval_candidate=ModelCandidate(
|
eval_candidate=ModelCandidate(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue