mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-21 07:22:25 +00:00
feat: convert Benchmarks API to use FastAPI router (#4309)
# What does this PR do? Convert the Benchmarks API from @webmethod decorators to FastAPI router pattern, matching the Batches API structure. One notable change is the update of stack.py to handle request models in register_resources(). Closes: #4308 ## Test Plan CI and `curl http://localhost:8321/v1/inspect/routes | jq '.data[] | select(.route | contains("benchmark"))'` --------- Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
parent
661985e240
commit
ff375f1abb
18 changed files with 862 additions and 195 deletions
|
|
@ -40,7 +40,11 @@ from .benchmarks import (
|
|||
BenchmarkInput,
|
||||
Benchmarks,
|
||||
CommonBenchmarkFields,
|
||||
GetBenchmarkRequest,
|
||||
ListBenchmarksRequest,
|
||||
ListBenchmarksResponse,
|
||||
RegisterBenchmarkRequest,
|
||||
UnregisterBenchmarkRequest,
|
||||
)
|
||||
|
||||
# Import commonly used types from common submodule
|
||||
|
|
@ -567,7 +571,11 @@ __all__ = [
|
|||
"LLMRAGQueryGeneratorConfig",
|
||||
"ListBatchesResponse",
|
||||
"RetrieveBatchRequest",
|
||||
"GetBenchmarkRequest",
|
||||
"ListBenchmarksRequest",
|
||||
"ListBenchmarksResponse",
|
||||
"RegisterBenchmarkRequest",
|
||||
"UnregisterBenchmarkRequest",
|
||||
"ListDatasetsResponse",
|
||||
"ListModelsResponse",
|
||||
"ListOpenAIChatCompletionResponse",
|
||||
|
|
|
|||
|
|
@ -1,105 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from typing import Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack_api.resource import Resource, ResourceType
|
||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
|
||||
|
||||
|
||||
class CommonBenchmarkFields(BaseModel):
|
||||
dataset_id: str
|
||||
scoring_functions: list[str]
|
||||
metadata: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Metadata for this evaluation task",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Benchmark(CommonBenchmarkFields, Resource):
|
||||
"""A benchmark resource for evaluating model performance.
|
||||
|
||||
:param dataset_id: Identifier of the dataset to use for the benchmark evaluation
|
||||
:param scoring_functions: List of scoring function identifiers to apply during evaluation
|
||||
:param metadata: Metadata for this evaluation task
|
||||
:param type: The resource type, always benchmark
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.benchmark] = ResourceType.benchmark
|
||||
|
||||
@property
|
||||
def benchmark_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_benchmark_id(self) -> str | None:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class BenchmarkInput(CommonBenchmarkFields, BaseModel):
|
||||
benchmark_id: str
|
||||
provider_id: str | None = None
|
||||
provider_benchmark_id: str | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListBenchmarksResponse(BaseModel):
|
||||
data: list[Benchmark]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Benchmarks(Protocol):
|
||||
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def list_benchmarks(self) -> ListBenchmarksResponse:
|
||||
"""List all benchmarks.
|
||||
|
||||
:returns: A ListBenchmarksResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def get_benchmark(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
) -> Benchmark:
|
||||
"""Get a benchmark by its ID.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to get.
|
||||
:returns: A Benchmark.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
|
||||
async def register_benchmark(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
dataset_id: str,
|
||||
scoring_functions: list[str],
|
||||
provider_benchmark_id: str | None = None,
|
||||
provider_id: str | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Register a benchmark.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to register.
|
||||
:param dataset_id: The ID of the dataset to use for the benchmark.
|
||||
:param scoring_functions: The scoring functions to use for the benchmark.
|
||||
:param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
|
||||
:param provider_id: The ID of the provider to use for the benchmark.
|
||||
:param metadata: The metadata to use for the benchmark.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
|
||||
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||
"""Unregister a benchmark.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to unregister.
|
||||
"""
|
||||
...
|
||||
43
src/llama_stack_api/benchmarks/__init__.py
Normal file
43
src/llama_stack_api/benchmarks/__init__.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""Benchmarks API protocol and models.
|
||||
|
||||
This module contains the Benchmarks protocol definition.
|
||||
Pydantic models are defined in llama_stack_api.benchmarks.models.
|
||||
The FastAPI router is defined in llama_stack_api.benchmarks.fastapi_routes.
|
||||
"""
|
||||
|
||||
# Import fastapi_routes for router factory access
|
||||
from . import fastapi_routes
|
||||
|
||||
# Import protocol for re-export
|
||||
from .api import Benchmarks
|
||||
|
||||
# Import models for re-export
|
||||
from .models import (
|
||||
Benchmark,
|
||||
BenchmarkInput,
|
||||
CommonBenchmarkFields,
|
||||
GetBenchmarkRequest,
|
||||
ListBenchmarksRequest,
|
||||
ListBenchmarksResponse,
|
||||
RegisterBenchmarkRequest,
|
||||
UnregisterBenchmarkRequest,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Benchmarks",
|
||||
"Benchmark",
|
||||
"BenchmarkInput",
|
||||
"CommonBenchmarkFields",
|
||||
"ListBenchmarksResponse",
|
||||
"ListBenchmarksRequest",
|
||||
"GetBenchmarkRequest",
|
||||
"RegisterBenchmarkRequest",
|
||||
"UnregisterBenchmarkRequest",
|
||||
"fastapi_routes",
|
||||
]
|
||||
39
src/llama_stack_api/benchmarks/api.py
Normal file
39
src/llama_stack_api/benchmarks/api.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
from .models import (
|
||||
Benchmark,
|
||||
GetBenchmarkRequest,
|
||||
ListBenchmarksRequest,
|
||||
ListBenchmarksResponse,
|
||||
RegisterBenchmarkRequest,
|
||||
UnregisterBenchmarkRequest,
|
||||
)
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Benchmarks(Protocol):
|
||||
async def list_benchmarks(
|
||||
self,
|
||||
request: ListBenchmarksRequest,
|
||||
) -> ListBenchmarksResponse: ...
|
||||
|
||||
async def get_benchmark(
|
||||
self,
|
||||
request: GetBenchmarkRequest,
|
||||
) -> Benchmark: ...
|
||||
|
||||
async def register_benchmark(
|
||||
self,
|
||||
request: RegisterBenchmarkRequest,
|
||||
) -> None: ...
|
||||
|
||||
async def unregister_benchmark(
|
||||
self,
|
||||
request: UnregisterBenchmarkRequest,
|
||||
) -> None: ...
|
||||
109
src/llama_stack_api/benchmarks/fastapi_routes.py
Normal file
109
src/llama_stack_api/benchmarks/fastapi_routes.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""FastAPI router for the Benchmarks API.
|
||||
|
||||
This module defines the FastAPI router for the Benchmarks API using standard
|
||||
FastAPI route decorators. The router is defined in the API package to keep
|
||||
all API-related code together.
|
||||
"""
|
||||
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Body, Depends
|
||||
|
||||
from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
|
||||
from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
|
||||
|
||||
from .api import Benchmarks
|
||||
from .models import (
|
||||
Benchmark,
|
||||
GetBenchmarkRequest,
|
||||
ListBenchmarksRequest,
|
||||
ListBenchmarksResponse,
|
||||
RegisterBenchmarkRequest,
|
||||
UnregisterBenchmarkRequest,
|
||||
)
|
||||
|
||||
# Automatically generate dependency functions from Pydantic models
|
||||
# This ensures the models are the single source of truth for descriptions
|
||||
get_list_benchmarks_request = create_query_dependency(ListBenchmarksRequest)
|
||||
get_get_benchmark_request = create_path_dependency(GetBenchmarkRequest)
|
||||
get_unregister_benchmark_request = create_path_dependency(UnregisterBenchmarkRequest)
|
||||
|
||||
|
||||
def create_router(impl: Benchmarks) -> APIRouter:
|
||||
"""Create a FastAPI router for the Benchmarks API.
|
||||
|
||||
Args:
|
||||
impl: The Benchmarks implementation instance
|
||||
|
||||
Returns:
|
||||
APIRouter configured for the Benchmarks API
|
||||
"""
|
||||
router = APIRouter(
|
||||
prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
|
||||
tags=["Benchmarks"],
|
||||
responses=standard_responses,
|
||||
)
|
||||
|
||||
@router.get(
|
||||
"/eval/benchmarks",
|
||||
response_model=ListBenchmarksResponse,
|
||||
summary="List all benchmarks.",
|
||||
description="List all benchmarks.",
|
||||
responses={
|
||||
200: {"description": "A ListBenchmarksResponse."},
|
||||
},
|
||||
)
|
||||
async def list_benchmarks(
|
||||
request: Annotated[ListBenchmarksRequest, Depends(get_list_benchmarks_request)],
|
||||
) -> ListBenchmarksResponse:
|
||||
return await impl.list_benchmarks(request)
|
||||
|
||||
@router.get(
|
||||
"/eval/benchmarks/{benchmark_id}",
|
||||
response_model=Benchmark,
|
||||
summary="Get a benchmark by its ID.",
|
||||
description="Get a benchmark by its ID.",
|
||||
responses={
|
||||
200: {"description": "A Benchmark."},
|
||||
},
|
||||
)
|
||||
async def get_benchmark(
|
||||
request: Annotated[GetBenchmarkRequest, Depends(get_get_benchmark_request)],
|
||||
) -> Benchmark:
|
||||
return await impl.get_benchmark(request)
|
||||
|
||||
@router.post(
|
||||
"/eval/benchmarks",
|
||||
summary="Register a benchmark.",
|
||||
description="Register a benchmark.",
|
||||
responses={
|
||||
200: {"description": "The benchmark was successfully registered."},
|
||||
},
|
||||
deprecated=True,
|
||||
)
|
||||
async def register_benchmark(
|
||||
request: Annotated[RegisterBenchmarkRequest, Body(...)],
|
||||
) -> None:
|
||||
return await impl.register_benchmark(request)
|
||||
|
||||
@router.delete(
|
||||
"/eval/benchmarks/{benchmark_id}",
|
||||
summary="Unregister a benchmark.",
|
||||
description="Unregister a benchmark.",
|
||||
responses={
|
||||
200: {"description": "The benchmark was successfully unregistered."},
|
||||
},
|
||||
deprecated=True,
|
||||
)
|
||||
async def unregister_benchmark(
|
||||
request: Annotated[UnregisterBenchmarkRequest, Depends(get_unregister_benchmark_request)],
|
||||
) -> None:
|
||||
return await impl.unregister_benchmark(request)
|
||||
|
||||
return router
|
||||
109
src/llama_stack_api/benchmarks/models.py
Normal file
109
src/llama_stack_api/benchmarks/models.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""Pydantic models for Benchmarks API requests and responses.
|
||||
|
||||
This module defines the request and response models for the Benchmarks API
|
||||
using Pydantic with Field descriptions for OpenAPI schema generation.
|
||||
"""
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack_api.resource import Resource, ResourceType
|
||||
from llama_stack_api.schema_utils import json_schema_type
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListBenchmarksRequest(BaseModel):
|
||||
"""Request model for listing benchmarks."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class GetBenchmarkRequest(BaseModel):
|
||||
"""Request model for getting a benchmark."""
|
||||
|
||||
benchmark_id: str = Field(..., description="The ID of the benchmark to get.")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RegisterBenchmarkRequest(BaseModel):
|
||||
"""Request model for registering a benchmark."""
|
||||
|
||||
benchmark_id: str = Field(..., description="The ID of the benchmark to register.")
|
||||
dataset_id: str = Field(..., description="The ID of the dataset to use for the benchmark.")
|
||||
scoring_functions: list[str] = Field(..., description="The scoring functions to use for the benchmark.")
|
||||
provider_benchmark_id: str | None = Field(
|
||||
default=None, description="The ID of the provider benchmark to use for the benchmark."
|
||||
)
|
||||
provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
|
||||
metadata: dict[str, Any] | None = Field(default=None, description="The metadata to use for the benchmark.")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class UnregisterBenchmarkRequest(BaseModel):
|
||||
"""Request model for unregistering a benchmark."""
|
||||
|
||||
benchmark_id: str = Field(..., description="The ID of the benchmark to unregister.")
|
||||
|
||||
|
||||
class CommonBenchmarkFields(BaseModel):
|
||||
dataset_id: str = Field(..., description="Identifier of the dataset to use for the benchmark evaluation.")
|
||||
scoring_functions: list[str] = Field(
|
||||
..., description="List of scoring function identifiers to apply during evaluation."
|
||||
)
|
||||
metadata: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Metadata for this evaluation task.",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Benchmark(CommonBenchmarkFields, Resource):
|
||||
"""A benchmark resource for evaluating model performance."""
|
||||
|
||||
type: Literal[ResourceType.benchmark] = Field(
|
||||
default=ResourceType.benchmark,
|
||||
description="The resource type, always benchmark.",
|
||||
)
|
||||
|
||||
@property
|
||||
def benchmark_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_benchmark_id(self) -> str | None:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class BenchmarkInput(CommonBenchmarkFields, BaseModel):
|
||||
benchmark_id: str = Field(..., description="The ID of the benchmark.")
|
||||
provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
|
||||
provider_benchmark_id: str | None = Field(
|
||||
default=None, description="The ID of the provider benchmark to use for the benchmark."
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListBenchmarksResponse(BaseModel):
|
||||
"""Response containing a list of benchmark objects."""
|
||||
|
||||
data: list[Benchmark] = Field(..., description="List of benchmark objects.")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ListBenchmarksRequest",
|
||||
"GetBenchmarkRequest",
|
||||
"RegisterBenchmarkRequest",
|
||||
"UnregisterBenchmarkRequest",
|
||||
"CommonBenchmarkFields",
|
||||
"Benchmark",
|
||||
"BenchmarkInput",
|
||||
"ListBenchmarksResponse",
|
||||
]
|
||||
Loading…
Add table
Add a link
Reference in a new issue