feat: convert Benchmarks API to use FastAPI router (#4309)

# What does this PR do?

Convert the Benchmarks API from @webmethod decorators to FastAPI router
pattern, matching the Batches API structure.

One notable change is the update of stack.py to handle request models in
register_resources().

Closes: #4308 

## Test Plan

CI and `curl http://localhost:8321/v1/inspect/routes | jq '.data[] |
select(.route | contains("benchmark"))'`

---------

Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
Sébastien Han 2025-12-10 15:04:27 +01:00 committed by GitHub
parent 661985e240
commit ff375f1abb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 862 additions and 195 deletions

View file

@ -40,7 +40,11 @@ from .benchmarks import (
BenchmarkInput,
Benchmarks,
CommonBenchmarkFields,
GetBenchmarkRequest,
ListBenchmarksRequest,
ListBenchmarksResponse,
RegisterBenchmarkRequest,
UnregisterBenchmarkRequest,
)
# Import commonly used types from common submodule
@ -567,7 +571,11 @@ __all__ = [
"LLMRAGQueryGeneratorConfig",
"ListBatchesResponse",
"RetrieveBatchRequest",
"GetBenchmarkRequest",
"ListBenchmarksRequest",
"ListBenchmarksResponse",
"RegisterBenchmarkRequest",
"UnregisterBenchmarkRequest",
"ListDatasetsResponse",
"ListModelsResponse",
"ListOpenAIChatCompletionResponse",

View file

@ -1,105 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, Field
from llama_stack_api.resource import Resource, ResourceType
from llama_stack_api.schema_utils import json_schema_type, webmethod
from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
class CommonBenchmarkFields(BaseModel):
dataset_id: str
scoring_functions: list[str]
metadata: dict[str, Any] = Field(
default_factory=dict,
description="Metadata for this evaluation task",
)
@json_schema_type
class Benchmark(CommonBenchmarkFields, Resource):
"""A benchmark resource for evaluating model performance.
:param dataset_id: Identifier of the dataset to use for the benchmark evaluation
:param scoring_functions: List of scoring function identifiers to apply during evaluation
:param metadata: Metadata for this evaluation task
:param type: The resource type, always benchmark
"""
type: Literal[ResourceType.benchmark] = ResourceType.benchmark
@property
def benchmark_id(self) -> str:
return self.identifier
@property
def provider_benchmark_id(self) -> str | None:
return self.provider_resource_id
class BenchmarkInput(CommonBenchmarkFields, BaseModel):
benchmark_id: str
provider_id: str | None = None
provider_benchmark_id: str | None = None
@json_schema_type
class ListBenchmarksResponse(BaseModel):
data: list[Benchmark]
@runtime_checkable
class Benchmarks(Protocol):
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def list_benchmarks(self) -> ListBenchmarksResponse:
"""List all benchmarks.
:returns: A ListBenchmarksResponse.
"""
...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_benchmark(
self,
benchmark_id: str,
) -> Benchmark:
"""Get a benchmark by its ID.
:param benchmark_id: The ID of the benchmark to get.
:returns: A Benchmark.
"""
...
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
async def register_benchmark(
self,
benchmark_id: str,
dataset_id: str,
scoring_functions: list[str],
provider_benchmark_id: str | None = None,
provider_id: str | None = None,
metadata: dict[str, Any] | None = None,
) -> None:
"""Register a benchmark.
:param benchmark_id: The ID of the benchmark to register.
:param dataset_id: The ID of the dataset to use for the benchmark.
:param scoring_functions: The scoring functions to use for the benchmark.
:param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
:param provider_id: The ID of the provider to use for the benchmark.
:param metadata: The metadata to use for the benchmark.
"""
...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
async def unregister_benchmark(self, benchmark_id: str) -> None:
"""Unregister a benchmark.
:param benchmark_id: The ID of the benchmark to unregister.
"""
...

View file

@ -0,0 +1,43 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""Benchmarks API protocol and models.
This module contains the Benchmarks protocol definition.
Pydantic models are defined in llama_stack_api.benchmarks.models.
The FastAPI router is defined in llama_stack_api.benchmarks.fastapi_routes.
"""
# Import fastapi_routes for router factory access
from . import fastapi_routes
# Import protocol for re-export
from .api import Benchmarks
# Import models for re-export
from .models import (
Benchmark,
BenchmarkInput,
CommonBenchmarkFields,
GetBenchmarkRequest,
ListBenchmarksRequest,
ListBenchmarksResponse,
RegisterBenchmarkRequest,
UnregisterBenchmarkRequest,
)
__all__ = [
"Benchmarks",
"Benchmark",
"BenchmarkInput",
"CommonBenchmarkFields",
"ListBenchmarksResponse",
"ListBenchmarksRequest",
"GetBenchmarkRequest",
"RegisterBenchmarkRequest",
"UnregisterBenchmarkRequest",
"fastapi_routes",
]

View file

@ -0,0 +1,39 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Protocol, runtime_checkable
from .models import (
Benchmark,
GetBenchmarkRequest,
ListBenchmarksRequest,
ListBenchmarksResponse,
RegisterBenchmarkRequest,
UnregisterBenchmarkRequest,
)
@runtime_checkable
class Benchmarks(Protocol):
async def list_benchmarks(
self,
request: ListBenchmarksRequest,
) -> ListBenchmarksResponse: ...
async def get_benchmark(
self,
request: GetBenchmarkRequest,
) -> Benchmark: ...
async def register_benchmark(
self,
request: RegisterBenchmarkRequest,
) -> None: ...
async def unregister_benchmark(
self,
request: UnregisterBenchmarkRequest,
) -> None: ...

View file

@ -0,0 +1,109 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""FastAPI router for the Benchmarks API.
This module defines the FastAPI router for the Benchmarks API using standard
FastAPI route decorators. The router is defined in the API package to keep
all API-related code together.
"""
from typing import Annotated
from fastapi import APIRouter, Body, Depends
from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
from .api import Benchmarks
from .models import (
Benchmark,
GetBenchmarkRequest,
ListBenchmarksRequest,
ListBenchmarksResponse,
RegisterBenchmarkRequest,
UnregisterBenchmarkRequest,
)
# Automatically generate dependency functions from Pydantic models
# This ensures the models are the single source of truth for descriptions
get_list_benchmarks_request = create_query_dependency(ListBenchmarksRequest)
get_get_benchmark_request = create_path_dependency(GetBenchmarkRequest)
get_unregister_benchmark_request = create_path_dependency(UnregisterBenchmarkRequest)
def create_router(impl: Benchmarks) -> APIRouter:
"""Create a FastAPI router for the Benchmarks API.
Args:
impl: The Benchmarks implementation instance
Returns:
APIRouter configured for the Benchmarks API
"""
router = APIRouter(
prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
tags=["Benchmarks"],
responses=standard_responses,
)
@router.get(
"/eval/benchmarks",
response_model=ListBenchmarksResponse,
summary="List all benchmarks.",
description="List all benchmarks.",
responses={
200: {"description": "A ListBenchmarksResponse."},
},
)
async def list_benchmarks(
request: Annotated[ListBenchmarksRequest, Depends(get_list_benchmarks_request)],
) -> ListBenchmarksResponse:
return await impl.list_benchmarks(request)
@router.get(
"/eval/benchmarks/{benchmark_id}",
response_model=Benchmark,
summary="Get a benchmark by its ID.",
description="Get a benchmark by its ID.",
responses={
200: {"description": "A Benchmark."},
},
)
async def get_benchmark(
request: Annotated[GetBenchmarkRequest, Depends(get_get_benchmark_request)],
) -> Benchmark:
return await impl.get_benchmark(request)
@router.post(
"/eval/benchmarks",
summary="Register a benchmark.",
description="Register a benchmark.",
responses={
200: {"description": "The benchmark was successfully registered."},
},
deprecated=True,
)
async def register_benchmark(
request: Annotated[RegisterBenchmarkRequest, Body(...)],
) -> None:
return await impl.register_benchmark(request)
@router.delete(
"/eval/benchmarks/{benchmark_id}",
summary="Unregister a benchmark.",
description="Unregister a benchmark.",
responses={
200: {"description": "The benchmark was successfully unregistered."},
},
deprecated=True,
)
async def unregister_benchmark(
request: Annotated[UnregisterBenchmarkRequest, Depends(get_unregister_benchmark_request)],
) -> None:
return await impl.unregister_benchmark(request)
return router

View file

@ -0,0 +1,109 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""Pydantic models for Benchmarks API requests and responses.
This module defines the request and response models for the Benchmarks API
using Pydantic with Field descriptions for OpenAPI schema generation.
"""
from typing import Any, Literal
from pydantic import BaseModel, Field
from llama_stack_api.resource import Resource, ResourceType
from llama_stack_api.schema_utils import json_schema_type
@json_schema_type
class ListBenchmarksRequest(BaseModel):
"""Request model for listing benchmarks."""
pass
@json_schema_type
class GetBenchmarkRequest(BaseModel):
"""Request model for getting a benchmark."""
benchmark_id: str = Field(..., description="The ID of the benchmark to get.")
@json_schema_type
class RegisterBenchmarkRequest(BaseModel):
"""Request model for registering a benchmark."""
benchmark_id: str = Field(..., description="The ID of the benchmark to register.")
dataset_id: str = Field(..., description="The ID of the dataset to use for the benchmark.")
scoring_functions: list[str] = Field(..., description="The scoring functions to use for the benchmark.")
provider_benchmark_id: str | None = Field(
default=None, description="The ID of the provider benchmark to use for the benchmark."
)
provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
metadata: dict[str, Any] | None = Field(default=None, description="The metadata to use for the benchmark.")
@json_schema_type
class UnregisterBenchmarkRequest(BaseModel):
"""Request model for unregistering a benchmark."""
benchmark_id: str = Field(..., description="The ID of the benchmark to unregister.")
class CommonBenchmarkFields(BaseModel):
dataset_id: str = Field(..., description="Identifier of the dataset to use for the benchmark evaluation.")
scoring_functions: list[str] = Field(
..., description="List of scoring function identifiers to apply during evaluation."
)
metadata: dict[str, Any] = Field(
default_factory=dict,
description="Metadata for this evaluation task.",
)
@json_schema_type
class Benchmark(CommonBenchmarkFields, Resource):
"""A benchmark resource for evaluating model performance."""
type: Literal[ResourceType.benchmark] = Field(
default=ResourceType.benchmark,
description="The resource type, always benchmark.",
)
@property
def benchmark_id(self) -> str:
return self.identifier
@property
def provider_benchmark_id(self) -> str | None:
return self.provider_resource_id
class BenchmarkInput(CommonBenchmarkFields, BaseModel):
benchmark_id: str = Field(..., description="The ID of the benchmark.")
provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
provider_benchmark_id: str | None = Field(
default=None, description="The ID of the provider benchmark to use for the benchmark."
)
@json_schema_type
class ListBenchmarksResponse(BaseModel):
"""Response containing a list of benchmark objects."""
data: list[Benchmark] = Field(..., description="List of benchmark objects.")
__all__ = [
"ListBenchmarksRequest",
"GetBenchmarkRequest",
"RegisterBenchmarkRequest",
"UnregisterBenchmarkRequest",
"CommonBenchmarkFields",
"Benchmark",
"BenchmarkInput",
"ListBenchmarksResponse",
]