feat: convert Benchmarks API to use FastAPI router (#4309)

# What does this PR do? Convert the Benchmarks API from @webmethod decorators to FastAPI router pattern, matching the Batches API structure. One notable change is the update of stack.py to handle request models in register_resources(). Closes: #4308 ## Test Plan CI and `curl http://localhost:8321/v1/inspect/routes | jq '.data[] | select(.route | contains("benchmark"))'` --------- Signed-off-by: Sébastien Han <seb@redhat.com>
2025-12-21 07:22:25 +00:00 · 2025-12-10 15:04:27 +01:00 · 2025-12-10 15:04:27 +01:00 · ff375f1abb
commit ff375f1abb
parent 661985e240
18 changed files with 862 additions and 195 deletions
--- a/src/llama_stack_api/init.py
+++ b/src/llama_stack_api/init.py
@ -40,7 +40,11 @@ from .benchmarks import (
    BenchmarkInput,
    Benchmarks,
    CommonBenchmarkFields,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
 )

 # Import commonly used types from common submodule
@ -567,7 +571,11 @@ __all__ = [
    "LLMRAGQueryGeneratorConfig",
    "ListBatchesResponse",
    "RetrieveBatchRequest",
+    "GetBenchmarkRequest",
+    "ListBenchmarksRequest",
    "ListBenchmarksResponse",
+    "RegisterBenchmarkRequest",
+    "UnregisterBenchmarkRequest",
    "ListDatasetsResponse",
    "ListModelsResponse",
    "ListOpenAIChatCompletionResponse",
--- a/src/llama_stack_api/benchmarks.py
+++ b/src/llama_stack_api/benchmarks.py
@ -1,105 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from typing import Any, Literal, Protocol, runtime_checkable
-
-from pydantic import BaseModel, Field
-
-from llama_stack_api.resource import Resource, ResourceType
-from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
-
-
-class CommonBenchmarkFields(BaseModel):
-    dataset_id: str
-    scoring_functions: list[str]
-    metadata: dict[str, Any] = Field(
-        default_factory=dict,
-        description="Metadata for this evaluation task",
-    )
-
-
-@json_schema_type
-class Benchmark(CommonBenchmarkFields, Resource):
-    """A benchmark resource for evaluating model performance.
-
-    :param dataset_id: Identifier of the dataset to use for the benchmark evaluation
-    :param scoring_functions: List of scoring function identifiers to apply during evaluation
-    :param metadata: Metadata for this evaluation task
-    :param type: The resource type, always benchmark
-    """
-
-    type: Literal[ResourceType.benchmark] = ResourceType.benchmark
-
-    @property
-    def benchmark_id(self) -> str:
-        return self.identifier
-
-    @property
-    def provider_benchmark_id(self) -> str | None:
-        return self.provider_resource_id
-
-
-class BenchmarkInput(CommonBenchmarkFields, BaseModel):
-    benchmark_id: str
-    provider_id: str | None = None
-    provider_benchmark_id: str | None = None
-
-
-@json_schema_type
-class ListBenchmarksResponse(BaseModel):
-    data: list[Benchmark]
-
-
-@runtime_checkable
-class Benchmarks(Protocol):
-    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
-    async def list_benchmarks(self) -> ListBenchmarksResponse:
-        """List all benchmarks.
-
-        :returns: A ListBenchmarksResponse.
-        """
-        ...
-
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
-    async def get_benchmark(
-        self,
-        benchmark_id: str,
-    ) -> Benchmark:
-        """Get a benchmark by its ID.
-
-        :param benchmark_id: The ID of the benchmark to get.
-        :returns: A Benchmark.
-        """
-        ...
-
-    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
-    async def register_benchmark(
-        self,
-        benchmark_id: str,
-        dataset_id: str,
-        scoring_functions: list[str],
-        provider_benchmark_id: str | None = None,
-        provider_id: str | None = None,
-        metadata: dict[str, Any] | None = None,
-    ) -> None:
-        """Register a benchmark.
-
-        :param benchmark_id: The ID of the benchmark to register.
-        :param dataset_id: The ID of the dataset to use for the benchmark.
-        :param scoring_functions: The scoring functions to use for the benchmark.
-        :param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
-        :param provider_id: The ID of the provider to use for the benchmark.
-        :param metadata: The metadata to use for the benchmark.
-        """
-        ...
-
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
-    async def unregister_benchmark(self, benchmark_id: str) -> None:
-        """Unregister a benchmark.
-
-        :param benchmark_id: The ID of the benchmark to unregister.
-        """
-        ...
--- a/src/llama_stack_api/benchmarks/init.py
+++ b/src/llama_stack_api/benchmarks/init.py
@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Benchmarks API protocol and models.
+
+This module contains the Benchmarks protocol definition.
+Pydantic models are defined in llama_stack_api.benchmarks.models.
+The FastAPI router is defined in llama_stack_api.benchmarks.fastapi_routes.
+"""
+
+# Import fastapi_routes for router factory access
+from . import fastapi_routes
+
+# Import protocol for re-export
+from .api import Benchmarks
+
+# Import models for re-export
+from .models import (
+    Benchmark,
+    BenchmarkInput,
+    CommonBenchmarkFields,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
+    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
+)
+
+__all__ = [
+    "Benchmarks",
+    "Benchmark",
+    "BenchmarkInput",
+    "CommonBenchmarkFields",
+    "ListBenchmarksResponse",
+    "ListBenchmarksRequest",
+    "GetBenchmarkRequest",
+    "RegisterBenchmarkRequest",
+    "UnregisterBenchmarkRequest",
+    "fastapi_routes",
+]
--- a/src/llama_stack_api/benchmarks/api.py
+++ b/src/llama_stack_api/benchmarks/api.py
@ -0,0 +1,39 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Protocol, runtime_checkable
+
+from .models import (
+    Benchmark,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
+    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
+)
+
+
+@runtime_checkable
+class Benchmarks(Protocol):
+    async def list_benchmarks(
+        self,
+        request: ListBenchmarksRequest,
+    ) -> ListBenchmarksResponse: ...
+
+    async def get_benchmark(
+        self,
+        request: GetBenchmarkRequest,
+    ) -> Benchmark: ...
+
+    async def register_benchmark(
+        self,
+        request: RegisterBenchmarkRequest,
+    ) -> None: ...
+
+    async def unregister_benchmark(
+        self,
+        request: UnregisterBenchmarkRequest,
+    ) -> None: ...
--- a/src/llama_stack_api/benchmarks/fastapi_routes.py
+++ b/src/llama_stack_api/benchmarks/fastapi_routes.py
@ -0,0 +1,109 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""FastAPI router for the Benchmarks API.
+
+This module defines the FastAPI router for the Benchmarks API using standard
+FastAPI route decorators. The router is defined in the API package to keep
+all API-related code together.
+"""
+
+from typing import Annotated
+
+from fastapi import APIRouter, Body, Depends
+
+from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
+
+from .api import Benchmarks
+from .models import (
+    Benchmark,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
+    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
+)
+
+# Automatically generate dependency functions from Pydantic models
+# This ensures the models are the single source of truth for descriptions
+get_list_benchmarks_request = create_query_dependency(ListBenchmarksRequest)
+get_get_benchmark_request = create_path_dependency(GetBenchmarkRequest)
+get_unregister_benchmark_request = create_path_dependency(UnregisterBenchmarkRequest)
+
+
+def create_router(impl: Benchmarks) -> APIRouter:
+    """Create a FastAPI router for the Benchmarks API.
+
+    Args:
+        impl: The Benchmarks implementation instance
+
+    Returns:
+        APIRouter configured for the Benchmarks API
+    """
+    router = APIRouter(
+        prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
+        tags=["Benchmarks"],
+        responses=standard_responses,
+    )
+
+    @router.get(
+        "/eval/benchmarks",
+        response_model=ListBenchmarksResponse,
+        summary="List all benchmarks.",
+        description="List all benchmarks.",
+        responses={
+            200: {"description": "A ListBenchmarksResponse."},
+        },
+    )
+    async def list_benchmarks(
+        request: Annotated[ListBenchmarksRequest, Depends(get_list_benchmarks_request)],
+    ) -> ListBenchmarksResponse:
+        return await impl.list_benchmarks(request)
+
+    @router.get(
+        "/eval/benchmarks/{benchmark_id}",
+        response_model=Benchmark,
+        summary="Get a benchmark by its ID.",
+        description="Get a benchmark by its ID.",
+        responses={
+            200: {"description": "A Benchmark."},
+        },
+    )
+    async def get_benchmark(
+        request: Annotated[GetBenchmarkRequest, Depends(get_get_benchmark_request)],
+    ) -> Benchmark:
+        return await impl.get_benchmark(request)
+
+    @router.post(
+        "/eval/benchmarks",
+        summary="Register a benchmark.",
+        description="Register a benchmark.",
+        responses={
+            200: {"description": "The benchmark was successfully registered."},
+        },
+        deprecated=True,
+    )
+    async def register_benchmark(
+        request: Annotated[RegisterBenchmarkRequest, Body(...)],
+    ) -> None:
+        return await impl.register_benchmark(request)
+
+    @router.delete(
+        "/eval/benchmarks/{benchmark_id}",
+        summary="Unregister a benchmark.",
+        description="Unregister a benchmark.",
+        responses={
+            200: {"description": "The benchmark was successfully unregistered."},
+        },
+        deprecated=True,
+    )
+    async def unregister_benchmark(
+        request: Annotated[UnregisterBenchmarkRequest, Depends(get_unregister_benchmark_request)],
+    ) -> None:
+        return await impl.unregister_benchmark(request)
+
+    return router
--- a/src/llama_stack_api/benchmarks/models.py
+++ b/src/llama_stack_api/benchmarks/models.py
@ -0,0 +1,109 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Pydantic models for Benchmarks API requests and responses.
+
+This module defines the request and response models for the Benchmarks API
+using Pydantic with Field descriptions for OpenAPI schema generation.
+"""
+
+from typing import Any, Literal
+
+from pydantic import BaseModel, Field
+
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type
+
+
+@json_schema_type
+class ListBenchmarksRequest(BaseModel):
+    """Request model for listing benchmarks."""
+
+    pass
+
+
+@json_schema_type
+class GetBenchmarkRequest(BaseModel):
+    """Request model for getting a benchmark."""
+
+    benchmark_id: str = Field(..., description="The ID of the benchmark to get.")
+
+
+@json_schema_type
+class RegisterBenchmarkRequest(BaseModel):
+    """Request model for registering a benchmark."""
+
+    benchmark_id: str = Field(..., description="The ID of the benchmark to register.")
+    dataset_id: str = Field(..., description="The ID of the dataset to use for the benchmark.")
+    scoring_functions: list[str] = Field(..., description="The scoring functions to use for the benchmark.")
+    provider_benchmark_id: str | None = Field(
+        default=None, description="The ID of the provider benchmark to use for the benchmark."
+    )
+    provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
+    metadata: dict[str, Any] | None = Field(default=None, description="The metadata to use for the benchmark.")
+
+
+@json_schema_type
+class UnregisterBenchmarkRequest(BaseModel):
+    """Request model for unregistering a benchmark."""
+
+    benchmark_id: str = Field(..., description="The ID of the benchmark to unregister.")
+
+
+class CommonBenchmarkFields(BaseModel):
+    dataset_id: str = Field(..., description="Identifier of the dataset to use for the benchmark evaluation.")
+    scoring_functions: list[str] = Field(
+        ..., description="List of scoring function identifiers to apply during evaluation."
+    )
+    metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Metadata for this evaluation task.",
+    )
+
+
+@json_schema_type
+class Benchmark(CommonBenchmarkFields, Resource):
+    """A benchmark resource for evaluating model performance."""
+
+    type: Literal[ResourceType.benchmark] = Field(
+        default=ResourceType.benchmark,
+        description="The resource type, always benchmark.",
+    )
+
+    @property
+    def benchmark_id(self) -> str:
+        return self.identifier
+
+    @property
+    def provider_benchmark_id(self) -> str | None:
+        return self.provider_resource_id
+
+
+class BenchmarkInput(CommonBenchmarkFields, BaseModel):
+    benchmark_id: str = Field(..., description="The ID of the benchmark.")
+    provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
+    provider_benchmark_id: str | None = Field(
+        default=None, description="The ID of the provider benchmark to use for the benchmark."
+    )
+
+
+@json_schema_type
+class ListBenchmarksResponse(BaseModel):
+    """Response containing a list of benchmark objects."""
+
+    data: list[Benchmark] = Field(..., description="List of benchmark objects.")
+
+
+__all__ = [
+    "ListBenchmarksRequest",
+    "GetBenchmarkRequest",
+    "RegisterBenchmarkRequest",
+    "UnregisterBenchmarkRequest",
+    "CommonBenchmarkFields",
+    "Benchmark",
+    "BenchmarkInput",
+    "ListBenchmarksResponse",
+]