diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index 624443b00..61fa6705c 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -3404,7 +3404,7 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Benchmarks - summary: List Benchmarks + summary: List all benchmarks. description: List all benchmarks. operationId: list_benchmarks_v1alpha_eval_benchmarks_get post: @@ -3422,10 +3422,10 @@ paths: description: Default Response $ref: '#/components/responses/DefaultError' '204': - description: Successful Response + description: The benchmark was successfully registered. tags: - Benchmarks - summary: Register Benchmark + summary: Register a benchmark. description: Register a benchmark. operationId: register_benchmark_v1alpha_eval_benchmarks_post requestBody: @@ -3445,20 +3445,20 @@ paths: schema: $ref: '#/components/schemas/Benchmark' '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response tags: - Benchmarks - summary: Get Benchmark + summary: Get a benchmark by its ID. description: Get a benchmark by its ID. operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get parameters: @@ -3467,26 +3467,28 @@ paths: required: true schema: type: string - description: 'Path parameter: benchmark_id' + description: The ID of the benchmark to get. + title: Benchmark Id + description: The ID of the benchmark to get. delete: responses: '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response '204': - description: Successful Response + description: The benchmark was successfully unregistered. tags: - Benchmarks - summary: Unregister Benchmark + summary: Unregister a benchmark. description: Unregister a benchmark. operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete parameters: @@ -3495,7 +3497,9 @@ paths: required: true schema: type: string - description: 'Path parameter: benchmark_id' + description: The ID of the benchmark to unregister. + title: Benchmark Id + description: The ID of the benchmark to unregister. deprecated: true /v1alpha/eval/benchmarks/{benchmark_id}/evaluations: post: @@ -10391,20 +10395,23 @@ components: type: string const: benchmark title: Type + description: The resource type, always benchmark. default: benchmark dataset_id: type: string title: Dataset Id + description: Identifier of the dataset to use for the benchmark evaluation. scoring_functions: items: type: string type: array title: Scoring Functions + description: List of scoring function identifiers to apply during evaluation. metadata: additionalProperties: true type: object title: Metadata - description: Metadata for this evaluation task + description: Metadata for this evaluation task. type: object required: - identifier @@ -10420,10 +10427,12 @@ components: $ref: '#/components/schemas/Benchmark' type: array title: Data + description: List of benchmark objects. type: object required: - data title: ListBenchmarksResponse + description: Response containing a list of benchmark objects. BenchmarkConfig: properties: eval_candidate: @@ -11385,33 +11394,40 @@ components: benchmark_id: type: string title: Benchmark Id + description: The ID of the benchmark to register. dataset_id: type: string title: Dataset Id + description: The ID of the dataset to use for the benchmark. scoring_functions: items: type: string type: array title: Scoring Functions + description: The scoring functions to use for the benchmark. provider_benchmark_id: anyOf: - type: string - type: 'null' + description: The ID of the provider benchmark to use for the benchmark. provider_id: anyOf: - type: string - type: 'null' + description: The ID of the provider to use for the benchmark. metadata: anyOf: - additionalProperties: true type: object - type: 'null' + description: The metadata to use for the benchmark. type: object required: - benchmark_id - dataset_id - scoring_functions title: RegisterBenchmarkRequest + description: Request model for registering a benchmark. AllowedToolsFilter: properties: tool_names: @@ -12617,6 +12633,33 @@ components: - batch_id title: CancelBatchRequest type: object + ListBenchmarksRequest: + description: Request model for listing benchmarks. + properties: {} + title: ListBenchmarksRequest + type: object + GetBenchmarkRequest: + description: Request model for getting a benchmark. + properties: + benchmark_id: + description: The ID of the benchmark to get. + title: Benchmark Id + type: string + required: + - benchmark_id + title: GetBenchmarkRequest + type: object + UnregisterBenchmarkRequest: + description: Request model for unregistering a benchmark. + properties: + benchmark_id: + description: The ID of the benchmark to unregister. + title: Benchmark Id + type: string + required: + - benchmark_id + title: UnregisterBenchmarkRequest + type: object DialogType: description: Parameter type for dialog data with semantic output labels. properties: diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 3f9766ae5..64f47d617 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -794,7 +794,7 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Benchmarks - summary: List Benchmarks + summary: List all benchmarks. description: List all benchmarks. operationId: list_benchmarks_v1alpha_eval_benchmarks_get post: @@ -812,10 +812,10 @@ paths: description: Default Response $ref: '#/components/responses/DefaultError' '204': - description: Successful Response + description: The benchmark was successfully registered. tags: - Benchmarks - summary: Register Benchmark + summary: Register a benchmark. description: Register a benchmark. operationId: register_benchmark_v1alpha_eval_benchmarks_post requestBody: @@ -835,20 +835,20 @@ paths: schema: $ref: '#/components/schemas/Benchmark' '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response tags: - Benchmarks - summary: Get Benchmark + summary: Get a benchmark by its ID. description: Get a benchmark by its ID. operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get parameters: @@ -857,26 +857,28 @@ paths: required: true schema: type: string - description: 'Path parameter: benchmark_id' + description: The ID of the benchmark to get. + title: Benchmark Id + description: The ID of the benchmark to get. delete: responses: '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response '204': - description: Successful Response + description: The benchmark was successfully unregistered. tags: - Benchmarks - summary: Unregister Benchmark + summary: Unregister a benchmark. description: Unregister a benchmark. operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete parameters: @@ -885,7 +887,9 @@ paths: required: true schema: type: string - description: 'Path parameter: benchmark_id' + description: The ID of the benchmark to unregister. + title: Benchmark Id + description: The ID of the benchmark to unregister. deprecated: true components: schemas: @@ -7375,20 +7379,23 @@ components: type: string const: benchmark title: Type + description: The resource type, always benchmark. default: benchmark dataset_id: type: string title: Dataset Id + description: Identifier of the dataset to use for the benchmark evaluation. scoring_functions: items: type: string type: array title: Scoring Functions + description: List of scoring function identifiers to apply during evaluation. metadata: additionalProperties: true type: object title: Metadata - description: Metadata for this evaluation task + description: Metadata for this evaluation task. type: object required: - identifier @@ -7404,10 +7411,12 @@ components: $ref: '#/components/schemas/Benchmark' type: array title: Data + description: List of benchmark objects. type: object required: - data title: ListBenchmarksResponse + description: Response containing a list of benchmark objects. BenchmarkConfig: properties: eval_candidate: @@ -8369,33 +8378,40 @@ components: benchmark_id: type: string title: Benchmark Id + description: The ID of the benchmark to register. dataset_id: type: string title: Dataset Id + description: The ID of the dataset to use for the benchmark. scoring_functions: items: type: string type: array title: Scoring Functions + description: The scoring functions to use for the benchmark. provider_benchmark_id: anyOf: - type: string - type: 'null' + description: The ID of the provider benchmark to use for the benchmark. provider_id: anyOf: - type: string - type: 'null' + description: The ID of the provider to use for the benchmark. metadata: anyOf: - additionalProperties: true type: object - type: 'null' + description: The metadata to use for the benchmark. type: object required: - benchmark_id - dataset_id - scoring_functions title: RegisterBenchmarkRequest + description: Request model for registering a benchmark. AllowedToolsFilter: properties: tool_names: @@ -9601,6 +9617,33 @@ components: - batch_id title: CancelBatchRequest type: object + ListBenchmarksRequest: + description: Request model for listing benchmarks. + properties: {} + title: ListBenchmarksRequest + type: object + GetBenchmarkRequest: + description: Request model for getting a benchmark. + properties: + benchmark_id: + description: The ID of the benchmark to get. + title: Benchmark Id + type: string + required: + - benchmark_id + title: GetBenchmarkRequest + type: object + UnregisterBenchmarkRequest: + description: Request model for unregistering a benchmark. + properties: + benchmark_id: + description: The ID of the benchmark to unregister. + title: Benchmark Id + type: string + required: + - benchmark_id + title: UnregisterBenchmarkRequest + type: object DialogType: description: Parameter type for dialog data with semantic output labels. properties: diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index 806972854..3a488073b 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -188,7 +188,7 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Benchmarks - summary: List Benchmarks + summary: List all benchmarks. description: List all benchmarks. operationId: list_benchmarks_v1alpha_eval_benchmarks_get /v1alpha/eval/benchmarks/{benchmark_id}: @@ -201,20 +201,20 @@ paths: schema: $ref: '#/components/schemas/Benchmark' '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response tags: - Benchmarks - summary: Get Benchmark + summary: Get a benchmark by its ID. description: Get a benchmark by its ID. operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get parameters: @@ -223,7 +223,9 @@ paths: required: true schema: type: string - description: 'Path parameter: benchmark_id' + description: The ID of the benchmark to get. + title: Benchmark Id + description: The ID of the benchmark to get. /v1alpha/eval/benchmarks/{benchmark_id}/evaluations: post: responses: @@ -6517,20 +6519,23 @@ components: type: string const: benchmark title: Type + description: The resource type, always benchmark. default: benchmark dataset_id: type: string title: Dataset Id + description: Identifier of the dataset to use for the benchmark evaluation. scoring_functions: items: type: string type: array title: Scoring Functions + description: List of scoring function identifiers to apply during evaluation. metadata: additionalProperties: true type: object title: Metadata - description: Metadata for this evaluation task + description: Metadata for this evaluation task. type: object required: - identifier @@ -6546,10 +6551,12 @@ components: $ref: '#/components/schemas/Benchmark' type: array title: Data + description: List of benchmark objects. type: object required: - data title: ListBenchmarksResponse + description: Response containing a list of benchmark objects. BenchmarkConfig: properties: eval_candidate: @@ -7346,6 +7353,45 @@ components: - $ref: '#/components/schemas/RowsDataSource' title: RowsDataSource title: URIDataSource | RowsDataSource + RegisterBenchmarkRequest: + properties: + benchmark_id: + type: string + title: Benchmark Id + description: The ID of the benchmark to register. + dataset_id: + type: string + title: Dataset Id + description: The ID of the dataset to use for the benchmark. + scoring_functions: + items: + type: string + type: array + title: Scoring Functions + description: The scoring functions to use for the benchmark. + provider_benchmark_id: + anyOf: + - type: string + - type: 'null' + description: The ID of the provider benchmark to use for the benchmark. + provider_id: + anyOf: + - type: string + - type: 'null' + description: The ID of the provider to use for the benchmark. + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + description: The metadata to use for the benchmark. + type: object + required: + - benchmark_id + - dataset_id + - scoring_functions + title: RegisterBenchmarkRequest + description: Request model for registering a benchmark. AllowedToolsFilter: properties: tool_names: @@ -8395,6 +8441,33 @@ components: - batch_id title: CancelBatchRequest type: object + ListBenchmarksRequest: + description: Request model for listing benchmarks. + properties: {} + title: ListBenchmarksRequest + type: object + GetBenchmarkRequest: + description: Request model for getting a benchmark. + properties: + benchmark_id: + description: The ID of the benchmark to get. + title: Benchmark Id + type: string + required: + - benchmark_id + title: GetBenchmarkRequest + type: object + UnregisterBenchmarkRequest: + description: Request model for unregistering a benchmark. + properties: + benchmark_id: + description: The ID of the benchmark to unregister. + title: Benchmark Id + type: string + required: + - benchmark_id + title: UnregisterBenchmarkRequest + type: object DialogType: description: Parameter type for dialog data with semantic output labels. properties: diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index b840ed798..6754f1773 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -9166,20 +9166,23 @@ components: type: string const: benchmark title: Type + description: The resource type, always benchmark. default: benchmark dataset_id: type: string title: Dataset Id + description: Identifier of the dataset to use for the benchmark evaluation. scoring_functions: items: type: string type: array title: Scoring Functions + description: List of scoring function identifiers to apply during evaluation. metadata: additionalProperties: true type: object title: Metadata - description: Metadata for this evaluation task + description: Metadata for this evaluation task. type: object required: - identifier @@ -9195,10 +9198,12 @@ components: $ref: '#/components/schemas/Benchmark' type: array title: Data + description: List of benchmark objects. type: object required: - data title: ListBenchmarksResponse + description: Response containing a list of benchmark objects. BenchmarkConfig: properties: eval_candidate: @@ -9848,6 +9853,45 @@ components: - $ref: '#/components/schemas/RowsDataSource' title: RowsDataSource title: URIDataSource | RowsDataSource + RegisterBenchmarkRequest: + properties: + benchmark_id: + type: string + title: Benchmark Id + description: The ID of the benchmark to register. + dataset_id: + type: string + title: Dataset Id + description: The ID of the dataset to use for the benchmark. + scoring_functions: + items: + type: string + type: array + title: Scoring Functions + description: The scoring functions to use for the benchmark. + provider_benchmark_id: + anyOf: + - type: string + - type: 'null' + description: The ID of the provider benchmark to use for the benchmark. + provider_id: + anyOf: + - type: string + - type: 'null' + description: The ID of the provider to use for the benchmark. + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + description: The metadata to use for the benchmark. + type: object + required: + - benchmark_id + - dataset_id + - scoring_functions + title: RegisterBenchmarkRequest + description: Request model for registering a benchmark. AllowedToolsFilter: properties: tool_names: @@ -11053,6 +11097,33 @@ components: - batch_id title: CancelBatchRequest type: object + ListBenchmarksRequest: + description: Request model for listing benchmarks. + properties: {} + title: ListBenchmarksRequest + type: object + GetBenchmarkRequest: + description: Request model for getting a benchmark. + properties: + benchmark_id: + description: The ID of the benchmark to get. + title: Benchmark Id + type: string + required: + - benchmark_id + title: GetBenchmarkRequest + type: object + UnregisterBenchmarkRequest: + description: Request model for unregistering a benchmark. + properties: + benchmark_id: + description: The ID of the benchmark to unregister. + title: Benchmark Id + type: string + required: + - benchmark_id + title: UnregisterBenchmarkRequest + type: object DialogType: description: Parameter type for dialog data with semantic output labels. properties: diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 624443b00..61fa6705c 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -3404,7 +3404,7 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Benchmarks - summary: List Benchmarks + summary: List all benchmarks. description: List all benchmarks. operationId: list_benchmarks_v1alpha_eval_benchmarks_get post: @@ -3422,10 +3422,10 @@ paths: description: Default Response $ref: '#/components/responses/DefaultError' '204': - description: Successful Response + description: The benchmark was successfully registered. tags: - Benchmarks - summary: Register Benchmark + summary: Register a benchmark. description: Register a benchmark. operationId: register_benchmark_v1alpha_eval_benchmarks_post requestBody: @@ -3445,20 +3445,20 @@ paths: schema: $ref: '#/components/schemas/Benchmark' '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response tags: - Benchmarks - summary: Get Benchmark + summary: Get a benchmark by its ID. description: Get a benchmark by its ID. operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get parameters: @@ -3467,26 +3467,28 @@ paths: required: true schema: type: string - description: 'Path parameter: benchmark_id' + description: The ID of the benchmark to get. + title: Benchmark Id + description: The ID of the benchmark to get. delete: responses: '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response '204': - description: Successful Response + description: The benchmark was successfully unregistered. tags: - Benchmarks - summary: Unregister Benchmark + summary: Unregister a benchmark. description: Unregister a benchmark. operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete parameters: @@ -3495,7 +3497,9 @@ paths: required: true schema: type: string - description: 'Path parameter: benchmark_id' + description: The ID of the benchmark to unregister. + title: Benchmark Id + description: The ID of the benchmark to unregister. deprecated: true /v1alpha/eval/benchmarks/{benchmark_id}/evaluations: post: @@ -10391,20 +10395,23 @@ components: type: string const: benchmark title: Type + description: The resource type, always benchmark. default: benchmark dataset_id: type: string title: Dataset Id + description: Identifier of the dataset to use for the benchmark evaluation. scoring_functions: items: type: string type: array title: Scoring Functions + description: List of scoring function identifiers to apply during evaluation. metadata: additionalProperties: true type: object title: Metadata - description: Metadata for this evaluation task + description: Metadata for this evaluation task. type: object required: - identifier @@ -10420,10 +10427,12 @@ components: $ref: '#/components/schemas/Benchmark' type: array title: Data + description: List of benchmark objects. type: object required: - data title: ListBenchmarksResponse + description: Response containing a list of benchmark objects. BenchmarkConfig: properties: eval_candidate: @@ -11385,33 +11394,40 @@ components: benchmark_id: type: string title: Benchmark Id + description: The ID of the benchmark to register. dataset_id: type: string title: Dataset Id + description: The ID of the dataset to use for the benchmark. scoring_functions: items: type: string type: array title: Scoring Functions + description: The scoring functions to use for the benchmark. provider_benchmark_id: anyOf: - type: string - type: 'null' + description: The ID of the provider benchmark to use for the benchmark. provider_id: anyOf: - type: string - type: 'null' + description: The ID of the provider to use for the benchmark. metadata: anyOf: - additionalProperties: true type: object - type: 'null' + description: The metadata to use for the benchmark. type: object required: - benchmark_id - dataset_id - scoring_functions title: RegisterBenchmarkRequest + description: Request model for registering a benchmark. AllowedToolsFilter: properties: tool_names: @@ -12617,6 +12633,33 @@ components: - batch_id title: CancelBatchRequest type: object + ListBenchmarksRequest: + description: Request model for listing benchmarks. + properties: {} + title: ListBenchmarksRequest + type: object + GetBenchmarkRequest: + description: Request model for getting a benchmark. + properties: + benchmark_id: + description: The ID of the benchmark to get. + title: Benchmark Id + type: string + required: + - benchmark_id + title: GetBenchmarkRequest + type: object + UnregisterBenchmarkRequest: + description: Request model for unregistering a benchmark. + properties: + benchmark_id: + description: The ID of the benchmark to unregister. + title: Benchmark Id + type: string + required: + - benchmark_id + title: UnregisterBenchmarkRequest + type: object DialogType: description: Parameter type for dialog data with semantic output labels. properties: diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py index 966723d9b..b3ca816cf 100644 --- a/src/llama_stack/core/inspect.py +++ b/src/llama_stack/core/inspect.py @@ -9,6 +9,7 @@ from importlib.metadata import version from pydantic import BaseModel from llama_stack.core.datatypes import StackConfig +from llama_stack.core.distribution import builtin_automatically_routed_apis from llama_stack.core.external import load_external_apis from llama_stack.core.server.fastapi_router_registry import ( _ROUTER_FACTORIES, @@ -65,6 +66,17 @@ class DistributionInspectImpl(Inspect): def _get_provider_types(api: Api) -> list[str]: if api.value in ["providers", "inspect"]: return [] # These APIs don't have "real" providers they're internal to the stack + + # For routing table APIs, look up providers from their router API + # (e.g., benchmarks -> eval, models -> inference, etc.) + auto_routed_apis = builtin_automatically_routed_apis() + for auto_routed in auto_routed_apis: + if auto_routed.routing_table_api == api: + # This is a routing table API, use its router API for providers + providers = config.providers.get(auto_routed.router_api.value, []) + return [p.provider_type for p in providers] if providers else [] + + # Regular API, look up providers directly providers = config.providers.get(api.value, []) return [p.provider_type for p in providers] if providers else [] diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py index 44545fa55..9d2ed3953 100644 --- a/src/llama_stack/core/library_client.py +++ b/src/llama_stack/core/library_client.py @@ -10,6 +10,7 @@ import json import logging # allow-direct-logging import os import sys +import typing from enum import Enum from io import BytesIO from pathlib import Path @@ -490,6 +491,25 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): unwrapped_body_param = param break + # Check for parameters with Depends() annotation (FastAPI router endpoints) + # These need special handling: construct the request model from body + depends_param = None + for param in params_list: + param_type = param.annotation + if get_origin(param_type) is typing.Annotated: + args = get_args(param_type) + if len(args) > 1: + # Check if any metadata is Depends + metadata = args[1:] + for item in metadata: + # Check if it's a Depends object (has dependency attribute or is a callable) + # Depends objects typically have a 'dependency' attribute or are callable functions + if hasattr(item, "dependency") or callable(item) or "Depends" in str(type(item)): + depends_param = param + break + if depends_param: + break + # Convert parameters to Pydantic models where needed converted_body = {} for param_name, param in sig.parameters.items(): @@ -500,6 +520,27 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): else: converted_body[param_name] = convert_to_pydantic(param.annotation, value) + # Handle Depends parameter: construct request model from body + if depends_param and depends_param.name not in converted_body: + param_type = depends_param.annotation + if get_origin(param_type) is typing.Annotated: + base_type = get_args(param_type)[0] + # Handle Union types (e.g., SomeRequestModel | None) - extract the non-None type + # In Python 3.10+, Union types created with | syntax are still typing.Union + origin = get_origin(base_type) + if origin is Union: + # Get the first non-None type from the Union + union_args = get_args(base_type) + base_type = next( + (t for t in union_args if t is not type(None) and t is not None), + union_args[0] if union_args else None, + ) + + # Only try to instantiate if it's a class (not a Union or other non-callable type) + if base_type is not None and inspect.isclass(base_type) and callable(base_type): + # Construct the request model from all body parameters + converted_body[depends_param.name] = base_type(**body) + # handle unwrapped body parameter after processing all named parameters if unwrapped_body_param: base_type = get_args(unwrapped_body_param.annotation)[0] diff --git a/src/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py index 9037ffe8b..d5e3799ba 100644 --- a/src/llama_stack/core/routing_tables/benchmarks.py +++ b/src/llama_stack/core/routing_tables/benchmarks.py @@ -4,13 +4,20 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Any from llama_stack.core.datatypes import ( BenchmarkWithOwner, ) from llama_stack.log import get_logger -from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse +from llama_stack_api import ( + Benchmark, + Benchmarks, + GetBenchmarkRequest, + ListBenchmarksRequest, + ListBenchmarksResponse, + RegisterBenchmarkRequest, + UnregisterBenchmarkRequest, +) from .common import CommonRoutingTableImpl @@ -18,26 +25,21 @@ logger = get_logger(name=__name__, category="core::routing_tables") class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks): - async def list_benchmarks(self) -> ListBenchmarksResponse: + async def list_benchmarks(self, request: ListBenchmarksRequest) -> ListBenchmarksResponse: return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark")) - async def get_benchmark(self, benchmark_id: str) -> Benchmark: - benchmark = await self.get_object_by_identifier("benchmark", benchmark_id) + async def get_benchmark(self, request: GetBenchmarkRequest) -> Benchmark: + benchmark = await self.get_object_by_identifier("benchmark", request.benchmark_id) if benchmark is None: - raise ValueError(f"Benchmark '{benchmark_id}' not found") + raise ValueError(f"Benchmark '{request.benchmark_id}' not found") return benchmark async def register_benchmark( self, - benchmark_id: str, - dataset_id: str, - scoring_functions: list[str], - metadata: dict[str, Any] | None = None, - provider_benchmark_id: str | None = None, - provider_id: str | None = None, + request: RegisterBenchmarkRequest, ) -> None: - if metadata is None: - metadata = {} + metadata = request.metadata if request.metadata is not None else {} + provider_id = request.provider_id if provider_id is None: if len(self.impls_by_provider_id) == 1: provider_id = list(self.impls_by_provider_id.keys())[0] @@ -45,18 +47,20 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks): raise ValueError( "No provider specified and multiple providers available. Please specify a provider_id." ) + provider_benchmark_id = request.provider_benchmark_id if provider_benchmark_id is None: - provider_benchmark_id = benchmark_id + provider_benchmark_id = request.benchmark_id benchmark = BenchmarkWithOwner( - identifier=benchmark_id, - dataset_id=dataset_id, - scoring_functions=scoring_functions, + identifier=request.benchmark_id, + dataset_id=request.dataset_id, + scoring_functions=request.scoring_functions, metadata=metadata, provider_id=provider_id, provider_resource_id=provider_benchmark_id, ) await self.register_object(benchmark) - async def unregister_benchmark(self, benchmark_id: str) -> None: - existing_benchmark = await self.get_benchmark(benchmark_id) + async def unregister_benchmark(self, request: UnregisterBenchmarkRequest) -> None: + get_request = GetBenchmarkRequest(benchmark_id=request.benchmark_id) + existing_benchmark = await self.get_benchmark(get_request) await self.unregister_object(existing_benchmark) diff --git a/src/llama_stack/core/server/fastapi_router_registry.py b/src/llama_stack/core/server/fastapi_router_registry.py index 4119cc847..4ca1a9837 100644 --- a/src/llama_stack/core/server/fastapi_router_registry.py +++ b/src/llama_stack/core/server/fastapi_router_registry.py @@ -17,7 +17,7 @@ from fastapi import APIRouter from fastapi.routing import APIRoute from starlette.routing import Route -from llama_stack_api import batches +from llama_stack_api import batches, benchmarks # Router factories for APIs that have FastAPI routers # Add new APIs here as they are migrated to the router system @@ -25,6 +25,7 @@ from llama_stack_api.datatypes import Api _ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = { "batches": batches.fastapi_routes.create_router, + "benchmarks": benchmarks.fastapi_routes.create_router, } diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py index 9df9e4a60..abefe3256 100644 --- a/src/llama_stack/core/server/routes.py +++ b/src/llama_stack/core/server/routes.py @@ -13,6 +13,11 @@ from aiohttp import hdrs from starlette.routing import Route from llama_stack.core.resolver import api_protocol_map +from llama_stack.core.server.fastapi_router_registry import ( + _ROUTER_FACTORIES, + build_fastapi_router, + get_router_routes, +) from llama_stack_api import Api, ExternalApiSpec, WebMethod EndpointFunc = Callable[..., Any] @@ -85,7 +90,53 @@ def initialize_route_impls(impls, external_apis: dict[Api, ExternalApiSpec] | No return f"^{pattern}$" + # Process routes from FastAPI routers + for api_name in _ROUTER_FACTORIES.keys(): + api = Api(api_name) + if api not in impls: + continue + impl = impls[api] + router = build_fastapi_router(api, impl) + if router: + router_routes = get_router_routes(router) + for route in router_routes: + # Get the endpoint function from the route + # For FastAPI routes, the endpoint is the actual function + func = route.endpoint + if func is None: + continue + + # Get the first (and typically only) method from the set, filtering out HEAD + available_methods = [m for m in (route.methods or []) if m != "HEAD"] + if not available_methods: + continue # Skip if only HEAD method is available + method = available_methods[0].lower() + + if method not in route_impls: + route_impls[method] = {} + + # Create a minimal WebMethod for router routes (needed for RouteMatch tuple) + # We don't have webmethod metadata for router routes, so create a minimal one + # that has the attributes used by the library client (descriptive_name for tracing) + # + # TODO: Long-term migration plan (once all APIs are migrated to FastAPI routers): + # - Extract summary from APIRoute: route.summary (available on FastAPI APIRoute objects) + # - Pass summary directly in RouteMatch instead of WebMethod + # - Remove this WebMethod() instantiation entirely + # - Update library_client.py to use the extracted summary instead of webmethod.descriptive_name + webmethod = WebMethod(descriptive_name=None) + route_impls[method][_convert_path_to_regex(route.path)] = ( + func, + route.path, + webmethod, + ) + + # Process routes from legacy webmethod-based APIs for api, api_routes in api_to_routes.items(): + # Skip APIs that have routers (already processed above) + if api.value in _ROUTER_FACTORIES: + continue + if api not in impls: continue for route, webmethod in api_routes: diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index 9310bce41..eade24c1b 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -6,12 +6,14 @@ import asyncio import importlib.resources +import inspect import os import re import tempfile -from typing import Any +from typing import Any, get_type_hints import yaml +from pydantic import BaseModel from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl from llama_stack.core.datatypes import Provider, SafetyConfig, StackConfig, VectorStoresConfig @@ -108,6 +110,81 @@ REGISTRY_REFRESH_TASK = None TEST_RECORDING_CONTEXT = None +def is_request_model(t: Any) -> bool: + """Check if a type is a request model (Pydantic BaseModel). + + Args: + t: The type to check + + Returns: + True if the type is a Pydantic BaseModel subclass, False otherwise + """ + + return inspect.isclass(t) and issubclass(t, BaseModel) + + +async def invoke_with_optional_request(method: Any) -> Any: + """Invoke a method, automatically creating a request instance if needed. + + For APIs that use request models, this will create an empty request object. + For backward compatibility, falls back to calling without arguments. + + Uses get_type_hints() to resolve forward references (e.g., "ListBenchmarksRequest" -> actual class). + + Handles methods with: + - No parameters: calls without arguments + - One or more request model parameters: creates empty instances for each + - Mixed parameters: creates request models, uses defaults for others + - Required non-request-model parameters without defaults: falls back to calling without arguments + + Args: + method: The method to invoke + + Returns: + The result of calling the method + """ + try: + hints = get_type_hints(method) + except Exception: + # Forward references can't be resolved, fall back to calling without request + return await method() + + params = list(inspect.signature(method).parameters.values()) + params = [p for p in params if p.name != "self"] + + if not params: + return await method() + + # Build arguments for the method call + args: dict[str, Any] = {} + can_call = True + + for param in params: + param_type = hints.get(param.name) + + # If it's a request model, try to create an empty instance + if param_type and is_request_model(param_type): + try: + args[param.name] = param_type() + except Exception: + # Request model requires arguments, can't create empty instance + can_call = False + break + # If it has a default value, we can skip it (will use default) + elif param.default != inspect.Parameter.empty: + continue + # Required parameter that's not a request model - can't provide it + else: + can_call = False + break + + if can_call and args: + return await method(**args) + + # Fall back to calling without arguments for backward compatibility + return await method() + + async def register_resources(run_config: StackConfig, impls: dict[Api, Any]): for rsrc, api, register_method, list_method in RESOURCES: objects = getattr(run_config.registered_resources, rsrc) @@ -129,7 +206,7 @@ async def register_resources(run_config: StackConfig, impls: dict[Api, Any]): await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()}) method = getattr(impls[api], list_method) - response = await method() + response = await invoke_with_optional_request(method) objects_to_process = response.data if hasattr(response, "data") else response diff --git a/src/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py index 407629eaa..e38f3cce6 100644 --- a/src/llama_stack_api/__init__.py +++ b/src/llama_stack_api/__init__.py @@ -40,7 +40,11 @@ from .benchmarks import ( BenchmarkInput, Benchmarks, CommonBenchmarkFields, + GetBenchmarkRequest, + ListBenchmarksRequest, ListBenchmarksResponse, + RegisterBenchmarkRequest, + UnregisterBenchmarkRequest, ) # Import commonly used types from common submodule @@ -567,7 +571,11 @@ __all__ = [ "LLMRAGQueryGeneratorConfig", "ListBatchesResponse", "RetrieveBatchRequest", + "GetBenchmarkRequest", + "ListBenchmarksRequest", "ListBenchmarksResponse", + "RegisterBenchmarkRequest", + "UnregisterBenchmarkRequest", "ListDatasetsResponse", "ListModelsResponse", "ListOpenAIChatCompletionResponse", diff --git a/src/llama_stack_api/benchmarks.py b/src/llama_stack_api/benchmarks.py deleted file mode 100644 index fdb2ccad4..000000000 --- a/src/llama_stack_api/benchmarks.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -from typing import Any, Literal, Protocol, runtime_checkable - -from pydantic import BaseModel, Field - -from llama_stack_api.resource import Resource, ResourceType -from llama_stack_api.schema_utils import json_schema_type, webmethod -from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA - - -class CommonBenchmarkFields(BaseModel): - dataset_id: str - scoring_functions: list[str] - metadata: dict[str, Any] = Field( - default_factory=dict, - description="Metadata for this evaluation task", - ) - - -@json_schema_type -class Benchmark(CommonBenchmarkFields, Resource): - """A benchmark resource for evaluating model performance. - - :param dataset_id: Identifier of the dataset to use for the benchmark evaluation - :param scoring_functions: List of scoring function identifiers to apply during evaluation - :param metadata: Metadata for this evaluation task - :param type: The resource type, always benchmark - """ - - type: Literal[ResourceType.benchmark] = ResourceType.benchmark - - @property - def benchmark_id(self) -> str: - return self.identifier - - @property - def provider_benchmark_id(self) -> str | None: - return self.provider_resource_id - - -class BenchmarkInput(CommonBenchmarkFields, BaseModel): - benchmark_id: str - provider_id: str | None = None - provider_benchmark_id: str | None = None - - -@json_schema_type -class ListBenchmarksResponse(BaseModel): - data: list[Benchmark] - - -@runtime_checkable -class Benchmarks(Protocol): - @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA) - async def list_benchmarks(self) -> ListBenchmarksResponse: - """List all benchmarks. - - :returns: A ListBenchmarksResponse. - """ - ... - - @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) - async def get_benchmark( - self, - benchmark_id: str, - ) -> Benchmark: - """Get a benchmark by its ID. - - :param benchmark_id: The ID of the benchmark to get. - :returns: A Benchmark. - """ - ... - - @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True) - async def register_benchmark( - self, - benchmark_id: str, - dataset_id: str, - scoring_functions: list[str], - provider_benchmark_id: str | None = None, - provider_id: str | None = None, - metadata: dict[str, Any] | None = None, - ) -> None: - """Register a benchmark. - - :param benchmark_id: The ID of the benchmark to register. - :param dataset_id: The ID of the dataset to use for the benchmark. - :param scoring_functions: The scoring functions to use for the benchmark. - :param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark. - :param provider_id: The ID of the provider to use for the benchmark. - :param metadata: The metadata to use for the benchmark. - """ - ... - - @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True) - async def unregister_benchmark(self, benchmark_id: str) -> None: - """Unregister a benchmark. - - :param benchmark_id: The ID of the benchmark to unregister. - """ - ... diff --git a/src/llama_stack_api/benchmarks/__init__.py b/src/llama_stack_api/benchmarks/__init__.py new file mode 100644 index 000000000..9c5652dce --- /dev/null +++ b/src/llama_stack_api/benchmarks/__init__.py @@ -0,0 +1,43 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Benchmarks API protocol and models. + +This module contains the Benchmarks protocol definition. +Pydantic models are defined in llama_stack_api.benchmarks.models. +The FastAPI router is defined in llama_stack_api.benchmarks.fastapi_routes. +""" + +# Import fastapi_routes for router factory access +from . import fastapi_routes + +# Import protocol for re-export +from .api import Benchmarks + +# Import models for re-export +from .models import ( + Benchmark, + BenchmarkInput, + CommonBenchmarkFields, + GetBenchmarkRequest, + ListBenchmarksRequest, + ListBenchmarksResponse, + RegisterBenchmarkRequest, + UnregisterBenchmarkRequest, +) + +__all__ = [ + "Benchmarks", + "Benchmark", + "BenchmarkInput", + "CommonBenchmarkFields", + "ListBenchmarksResponse", + "ListBenchmarksRequest", + "GetBenchmarkRequest", + "RegisterBenchmarkRequest", + "UnregisterBenchmarkRequest", + "fastapi_routes", +] diff --git a/src/llama_stack_api/benchmarks/api.py b/src/llama_stack_api/benchmarks/api.py new file mode 100644 index 000000000..26f88dbe2 --- /dev/null +++ b/src/llama_stack_api/benchmarks/api.py @@ -0,0 +1,39 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Protocol, runtime_checkable + +from .models import ( + Benchmark, + GetBenchmarkRequest, + ListBenchmarksRequest, + ListBenchmarksResponse, + RegisterBenchmarkRequest, + UnregisterBenchmarkRequest, +) + + +@runtime_checkable +class Benchmarks(Protocol): + async def list_benchmarks( + self, + request: ListBenchmarksRequest, + ) -> ListBenchmarksResponse: ... + + async def get_benchmark( + self, + request: GetBenchmarkRequest, + ) -> Benchmark: ... + + async def register_benchmark( + self, + request: RegisterBenchmarkRequest, + ) -> None: ... + + async def unregister_benchmark( + self, + request: UnregisterBenchmarkRequest, + ) -> None: ... diff --git a/src/llama_stack_api/benchmarks/fastapi_routes.py b/src/llama_stack_api/benchmarks/fastapi_routes.py new file mode 100644 index 000000000..461939ab9 --- /dev/null +++ b/src/llama_stack_api/benchmarks/fastapi_routes.py @@ -0,0 +1,109 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""FastAPI router for the Benchmarks API. + +This module defines the FastAPI router for the Benchmarks API using standard +FastAPI route decorators. The router is defined in the API package to keep +all API-related code together. +""" + +from typing import Annotated + +from fastapi import APIRouter, Body, Depends + +from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses +from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA + +from .api import Benchmarks +from .models import ( + Benchmark, + GetBenchmarkRequest, + ListBenchmarksRequest, + ListBenchmarksResponse, + RegisterBenchmarkRequest, + UnregisterBenchmarkRequest, +) + +# Automatically generate dependency functions from Pydantic models +# This ensures the models are the single source of truth for descriptions +get_list_benchmarks_request = create_query_dependency(ListBenchmarksRequest) +get_get_benchmark_request = create_path_dependency(GetBenchmarkRequest) +get_unregister_benchmark_request = create_path_dependency(UnregisterBenchmarkRequest) + + +def create_router(impl: Benchmarks) -> APIRouter: + """Create a FastAPI router for the Benchmarks API. + + Args: + impl: The Benchmarks implementation instance + + Returns: + APIRouter configured for the Benchmarks API + """ + router = APIRouter( + prefix=f"/{LLAMA_STACK_API_V1ALPHA}", + tags=["Benchmarks"], + responses=standard_responses, + ) + + @router.get( + "/eval/benchmarks", + response_model=ListBenchmarksResponse, + summary="List all benchmarks.", + description="List all benchmarks.", + responses={ + 200: {"description": "A ListBenchmarksResponse."}, + }, + ) + async def list_benchmarks( + request: Annotated[ListBenchmarksRequest, Depends(get_list_benchmarks_request)], + ) -> ListBenchmarksResponse: + return await impl.list_benchmarks(request) + + @router.get( + "/eval/benchmarks/{benchmark_id}", + response_model=Benchmark, + summary="Get a benchmark by its ID.", + description="Get a benchmark by its ID.", + responses={ + 200: {"description": "A Benchmark."}, + }, + ) + async def get_benchmark( + request: Annotated[GetBenchmarkRequest, Depends(get_get_benchmark_request)], + ) -> Benchmark: + return await impl.get_benchmark(request) + + @router.post( + "/eval/benchmarks", + summary="Register a benchmark.", + description="Register a benchmark.", + responses={ + 200: {"description": "The benchmark was successfully registered."}, + }, + deprecated=True, + ) + async def register_benchmark( + request: Annotated[RegisterBenchmarkRequest, Body(...)], + ) -> None: + return await impl.register_benchmark(request) + + @router.delete( + "/eval/benchmarks/{benchmark_id}", + summary="Unregister a benchmark.", + description="Unregister a benchmark.", + responses={ + 200: {"description": "The benchmark was successfully unregistered."}, + }, + deprecated=True, + ) + async def unregister_benchmark( + request: Annotated[UnregisterBenchmarkRequest, Depends(get_unregister_benchmark_request)], + ) -> None: + return await impl.unregister_benchmark(request) + + return router diff --git a/src/llama_stack_api/benchmarks/models.py b/src/llama_stack_api/benchmarks/models.py new file mode 100644 index 000000000..4d9eeb8c8 --- /dev/null +++ b/src/llama_stack_api/benchmarks/models.py @@ -0,0 +1,109 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Pydantic models for Benchmarks API requests and responses. + +This module defines the request and response models for the Benchmarks API +using Pydantic with Field descriptions for OpenAPI schema generation. +""" + +from typing import Any, Literal + +from pydantic import BaseModel, Field + +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type + + +@json_schema_type +class ListBenchmarksRequest(BaseModel): + """Request model for listing benchmarks.""" + + pass + + +@json_schema_type +class GetBenchmarkRequest(BaseModel): + """Request model for getting a benchmark.""" + + benchmark_id: str = Field(..., description="The ID of the benchmark to get.") + + +@json_schema_type +class RegisterBenchmarkRequest(BaseModel): + """Request model for registering a benchmark.""" + + benchmark_id: str = Field(..., description="The ID of the benchmark to register.") + dataset_id: str = Field(..., description="The ID of the dataset to use for the benchmark.") + scoring_functions: list[str] = Field(..., description="The scoring functions to use for the benchmark.") + provider_benchmark_id: str | None = Field( + default=None, description="The ID of the provider benchmark to use for the benchmark." + ) + provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.") + metadata: dict[str, Any] | None = Field(default=None, description="The metadata to use for the benchmark.") + + +@json_schema_type +class UnregisterBenchmarkRequest(BaseModel): + """Request model for unregistering a benchmark.""" + + benchmark_id: str = Field(..., description="The ID of the benchmark to unregister.") + + +class CommonBenchmarkFields(BaseModel): + dataset_id: str = Field(..., description="Identifier of the dataset to use for the benchmark evaluation.") + scoring_functions: list[str] = Field( + ..., description="List of scoring function identifiers to apply during evaluation." + ) + metadata: dict[str, Any] = Field( + default_factory=dict, + description="Metadata for this evaluation task.", + ) + + +@json_schema_type +class Benchmark(CommonBenchmarkFields, Resource): + """A benchmark resource for evaluating model performance.""" + + type: Literal[ResourceType.benchmark] = Field( + default=ResourceType.benchmark, + description="The resource type, always benchmark.", + ) + + @property + def benchmark_id(self) -> str: + return self.identifier + + @property + def provider_benchmark_id(self) -> str | None: + return self.provider_resource_id + + +class BenchmarkInput(CommonBenchmarkFields, BaseModel): + benchmark_id: str = Field(..., description="The ID of the benchmark.") + provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.") + provider_benchmark_id: str | None = Field( + default=None, description="The ID of the provider benchmark to use for the benchmark." + ) + + +@json_schema_type +class ListBenchmarksResponse(BaseModel): + """Response containing a list of benchmark objects.""" + + data: list[Benchmark] = Field(..., description="List of benchmark objects.") + + +__all__ = [ + "ListBenchmarksRequest", + "GetBenchmarkRequest", + "RegisterBenchmarkRequest", + "UnregisterBenchmarkRequest", + "CommonBenchmarkFields", + "Benchmark", + "BenchmarkInput", + "ListBenchmarksResponse", +] diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py index 292ee8384..676d786c2 100644 --- a/tests/unit/distribution/routers/test_routing_tables.py +++ b/tests/unit/distribution/routers/test_routing_tables.py @@ -22,14 +22,17 @@ from llama_stack_api import ( Api, Dataset, DatasetPurpose, + ListBenchmarksRequest, ListToolDefsResponse, Model, ModelNotFoundError, ModelType, NumberType, + RegisterBenchmarkRequest, Shield, ToolDef, ToolGroup, + UnregisterBenchmarkRequest, URIDataSource, ) @@ -420,24 +423,26 @@ async def test_benchmarks_routing_table(cached_disk_dist_registry): # Register multiple benchmarks and verify listing await table.register_benchmark( - benchmark_id="test-benchmark", - dataset_id="test-dataset", - scoring_functions=["test-scoring-fn", "test-scoring-fn-2"], + RegisterBenchmarkRequest( + benchmark_id="test-benchmark", + dataset_id="test-dataset", + scoring_functions=["test-scoring-fn", "test-scoring-fn-2"], + ) ) - benchmarks = await table.list_benchmarks() + benchmarks = await table.list_benchmarks(ListBenchmarksRequest()) assert len(benchmarks.data) == 1 benchmark_ids = {b.identifier for b in benchmarks.data} assert "test-benchmark" in benchmark_ids # Unregister the benchmark and verify removal - await table.unregister_benchmark(benchmark_id="test-benchmark") - benchmarks_after = await table.list_benchmarks() + await table.unregister_benchmark(UnregisterBenchmarkRequest(benchmark_id="test-benchmark")) + benchmarks_after = await table.list_benchmarks(ListBenchmarksRequest()) assert len(benchmarks_after.data) == 0 # Unregistering a non-existent benchmark should raise a clear error with pytest.raises(ValueError, match="Benchmark 'dummy_benchmark' not found"): - await table.unregister_benchmark(benchmark_id="dummy_benchmark") + await table.unregister_benchmark(UnregisterBenchmarkRequest(benchmark_id="dummy_benchmark")) async def test_tool_groups_routing_table(cached_disk_dist_registry):