mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-16 20:42:38 +00:00
feat: convert Benchmarks API to use FastAPI router (#4309)
# What does this PR do? Convert the Benchmarks API from @webmethod decorators to FastAPI router pattern, matching the Batches API structure. One notable change is the update of stack.py to handle request models in register_resources(). Closes: #4308 ## Test Plan CI and `curl http://localhost:8321/v1/inspect/routes | jq '.data[] | select(.route | contains("benchmark"))'` --------- Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
parent
661985e240
commit
ff375f1abb
18 changed files with 862 additions and 195 deletions
|
|
@ -3404,7 +3404,7 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: List Benchmarks
|
||||
summary: List all benchmarks.
|
||||
description: List all benchmarks.
|
||||
operationId: list_benchmarks_v1alpha_eval_benchmarks_get
|
||||
post:
|
||||
|
|
@ -3422,10 +3422,10 @@ paths:
|
|||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
'204':
|
||||
description: Successful Response
|
||||
description: The benchmark was successfully registered.
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Register Benchmark
|
||||
summary: Register a benchmark.
|
||||
description: Register a benchmark.
|
||||
operationId: register_benchmark_v1alpha_eval_benchmarks_post
|
||||
requestBody:
|
||||
|
|
@ -3445,20 +3445,20 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/Benchmark'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
description: Bad Request
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
description: Too Many Requests
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
description: Internal Server Error
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
description: Default Response
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Get Benchmark
|
||||
summary: Get a benchmark by its ID.
|
||||
description: Get a benchmark by its ID.
|
||||
operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
|
||||
parameters:
|
||||
|
|
@ -3467,26 +3467,28 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: benchmark_id'
|
||||
description: The ID of the benchmark to get.
|
||||
title: Benchmark Id
|
||||
description: The ID of the benchmark to get.
|
||||
delete:
|
||||
responses:
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
description: Bad Request
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
description: Too Many Requests
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
description: Internal Server Error
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
description: Default Response
|
||||
'204':
|
||||
description: Successful Response
|
||||
description: The benchmark was successfully unregistered.
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Unregister Benchmark
|
||||
summary: Unregister a benchmark.
|
||||
description: Unregister a benchmark.
|
||||
operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
|
||||
parameters:
|
||||
|
|
@ -3495,7 +3497,9 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: benchmark_id'
|
||||
description: The ID of the benchmark to unregister.
|
||||
title: Benchmark Id
|
||||
description: The ID of the benchmark to unregister.
|
||||
deprecated: true
|
||||
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
|
||||
post:
|
||||
|
|
@ -10391,20 +10395,23 @@ components:
|
|||
type: string
|
||||
const: benchmark
|
||||
title: Type
|
||||
description: The resource type, always benchmark.
|
||||
default: benchmark
|
||||
dataset_id:
|
||||
type: string
|
||||
title: Dataset Id
|
||||
description: Identifier of the dataset to use for the benchmark evaluation.
|
||||
scoring_functions:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
title: Scoring Functions
|
||||
description: List of scoring function identifiers to apply during evaluation.
|
||||
metadata:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
title: Metadata
|
||||
description: Metadata for this evaluation task
|
||||
description: Metadata for this evaluation task.
|
||||
type: object
|
||||
required:
|
||||
- identifier
|
||||
|
|
@ -10420,10 +10427,12 @@ components:
|
|||
$ref: '#/components/schemas/Benchmark'
|
||||
type: array
|
||||
title: Data
|
||||
description: List of benchmark objects.
|
||||
type: object
|
||||
required:
|
||||
- data
|
||||
title: ListBenchmarksResponse
|
||||
description: Response containing a list of benchmark objects.
|
||||
BenchmarkConfig:
|
||||
properties:
|
||||
eval_candidate:
|
||||
|
|
@ -11385,33 +11394,40 @@ components:
|
|||
benchmark_id:
|
||||
type: string
|
||||
title: Benchmark Id
|
||||
description: The ID of the benchmark to register.
|
||||
dataset_id:
|
||||
type: string
|
||||
title: Dataset Id
|
||||
description: The ID of the dataset to use for the benchmark.
|
||||
scoring_functions:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
title: Scoring Functions
|
||||
description: The scoring functions to use for the benchmark.
|
||||
provider_benchmark_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
description: The ID of the provider benchmark to use for the benchmark.
|
||||
provider_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
description: The ID of the provider to use for the benchmark.
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
description: The metadata to use for the benchmark.
|
||||
type: object
|
||||
required:
|
||||
- benchmark_id
|
||||
- dataset_id
|
||||
- scoring_functions
|
||||
title: RegisterBenchmarkRequest
|
||||
description: Request model for registering a benchmark.
|
||||
AllowedToolsFilter:
|
||||
properties:
|
||||
tool_names:
|
||||
|
|
@ -12617,6 +12633,33 @@ components:
|
|||
- batch_id
|
||||
title: CancelBatchRequest
|
||||
type: object
|
||||
ListBenchmarksRequest:
|
||||
description: Request model for listing benchmarks.
|
||||
properties: {}
|
||||
title: ListBenchmarksRequest
|
||||
type: object
|
||||
GetBenchmarkRequest:
|
||||
description: Request model for getting a benchmark.
|
||||
properties:
|
||||
benchmark_id:
|
||||
description: The ID of the benchmark to get.
|
||||
title: Benchmark Id
|
||||
type: string
|
||||
required:
|
||||
- benchmark_id
|
||||
title: GetBenchmarkRequest
|
||||
type: object
|
||||
UnregisterBenchmarkRequest:
|
||||
description: Request model for unregistering a benchmark.
|
||||
properties:
|
||||
benchmark_id:
|
||||
description: The ID of the benchmark to unregister.
|
||||
title: Benchmark Id
|
||||
type: string
|
||||
required:
|
||||
- benchmark_id
|
||||
title: UnregisterBenchmarkRequest
|
||||
type: object
|
||||
DialogType:
|
||||
description: Parameter type for dialog data with semantic output labels.
|
||||
properties:
|
||||
|
|
|
|||
77
docs/static/deprecated-llama-stack-spec.yaml
vendored
77
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -794,7 +794,7 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: List Benchmarks
|
||||
summary: List all benchmarks.
|
||||
description: List all benchmarks.
|
||||
operationId: list_benchmarks_v1alpha_eval_benchmarks_get
|
||||
post:
|
||||
|
|
@ -812,10 +812,10 @@ paths:
|
|||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
'204':
|
||||
description: Successful Response
|
||||
description: The benchmark was successfully registered.
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Register Benchmark
|
||||
summary: Register a benchmark.
|
||||
description: Register a benchmark.
|
||||
operationId: register_benchmark_v1alpha_eval_benchmarks_post
|
||||
requestBody:
|
||||
|
|
@ -835,20 +835,20 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/Benchmark'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
description: Bad Request
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
description: Too Many Requests
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
description: Internal Server Error
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
description: Default Response
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Get Benchmark
|
||||
summary: Get a benchmark by its ID.
|
||||
description: Get a benchmark by its ID.
|
||||
operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
|
||||
parameters:
|
||||
|
|
@ -857,26 +857,28 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: benchmark_id'
|
||||
description: The ID of the benchmark to get.
|
||||
title: Benchmark Id
|
||||
description: The ID of the benchmark to get.
|
||||
delete:
|
||||
responses:
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
description: Bad Request
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
description: Too Many Requests
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
description: Internal Server Error
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
description: Default Response
|
||||
'204':
|
||||
description: Successful Response
|
||||
description: The benchmark was successfully unregistered.
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Unregister Benchmark
|
||||
summary: Unregister a benchmark.
|
||||
description: Unregister a benchmark.
|
||||
operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
|
||||
parameters:
|
||||
|
|
@ -885,7 +887,9 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: benchmark_id'
|
||||
description: The ID of the benchmark to unregister.
|
||||
title: Benchmark Id
|
||||
description: The ID of the benchmark to unregister.
|
||||
deprecated: true
|
||||
components:
|
||||
schemas:
|
||||
|
|
@ -7375,20 +7379,23 @@ components:
|
|||
type: string
|
||||
const: benchmark
|
||||
title: Type
|
||||
description: The resource type, always benchmark.
|
||||
default: benchmark
|
||||
dataset_id:
|
||||
type: string
|
||||
title: Dataset Id
|
||||
description: Identifier of the dataset to use for the benchmark evaluation.
|
||||
scoring_functions:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
title: Scoring Functions
|
||||
description: List of scoring function identifiers to apply during evaluation.
|
||||
metadata:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
title: Metadata
|
||||
description: Metadata for this evaluation task
|
||||
description: Metadata for this evaluation task.
|
||||
type: object
|
||||
required:
|
||||
- identifier
|
||||
|
|
@ -7404,10 +7411,12 @@ components:
|
|||
$ref: '#/components/schemas/Benchmark'
|
||||
type: array
|
||||
title: Data
|
||||
description: List of benchmark objects.
|
||||
type: object
|
||||
required:
|
||||
- data
|
||||
title: ListBenchmarksResponse
|
||||
description: Response containing a list of benchmark objects.
|
||||
BenchmarkConfig:
|
||||
properties:
|
||||
eval_candidate:
|
||||
|
|
@ -8369,33 +8378,40 @@ components:
|
|||
benchmark_id:
|
||||
type: string
|
||||
title: Benchmark Id
|
||||
description: The ID of the benchmark to register.
|
||||
dataset_id:
|
||||
type: string
|
||||
title: Dataset Id
|
||||
description: The ID of the dataset to use for the benchmark.
|
||||
scoring_functions:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
title: Scoring Functions
|
||||
description: The scoring functions to use for the benchmark.
|
||||
provider_benchmark_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
description: The ID of the provider benchmark to use for the benchmark.
|
||||
provider_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
description: The ID of the provider to use for the benchmark.
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
description: The metadata to use for the benchmark.
|
||||
type: object
|
||||
required:
|
||||
- benchmark_id
|
||||
- dataset_id
|
||||
- scoring_functions
|
||||
title: RegisterBenchmarkRequest
|
||||
description: Request model for registering a benchmark.
|
||||
AllowedToolsFilter:
|
||||
properties:
|
||||
tool_names:
|
||||
|
|
@ -9601,6 +9617,33 @@ components:
|
|||
- batch_id
|
||||
title: CancelBatchRequest
|
||||
type: object
|
||||
ListBenchmarksRequest:
|
||||
description: Request model for listing benchmarks.
|
||||
properties: {}
|
||||
title: ListBenchmarksRequest
|
||||
type: object
|
||||
GetBenchmarkRequest:
|
||||
description: Request model for getting a benchmark.
|
||||
properties:
|
||||
benchmark_id:
|
||||
description: The ID of the benchmark to get.
|
||||
title: Benchmark Id
|
||||
type: string
|
||||
required:
|
||||
- benchmark_id
|
||||
title: GetBenchmarkRequest
|
||||
type: object
|
||||
UnregisterBenchmarkRequest:
|
||||
description: Request model for unregistering a benchmark.
|
||||
properties:
|
||||
benchmark_id:
|
||||
description: The ID of the benchmark to unregister.
|
||||
title: Benchmark Id
|
||||
type: string
|
||||
required:
|
||||
- benchmark_id
|
||||
title: UnregisterBenchmarkRequest
|
||||
type: object
|
||||
DialogType:
|
||||
description: Parameter type for dialog data with semantic output labels.
|
||||
properties:
|
||||
|
|
|
|||
89
docs/static/experimental-llama-stack-spec.yaml
vendored
89
docs/static/experimental-llama-stack-spec.yaml
vendored
|
|
@ -188,7 +188,7 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: List Benchmarks
|
||||
summary: List all benchmarks.
|
||||
description: List all benchmarks.
|
||||
operationId: list_benchmarks_v1alpha_eval_benchmarks_get
|
||||
/v1alpha/eval/benchmarks/{benchmark_id}:
|
||||
|
|
@ -201,20 +201,20 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/Benchmark'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
description: Bad Request
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
description: Too Many Requests
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
description: Internal Server Error
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
description: Default Response
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Get Benchmark
|
||||
summary: Get a benchmark by its ID.
|
||||
description: Get a benchmark by its ID.
|
||||
operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
|
||||
parameters:
|
||||
|
|
@ -223,7 +223,9 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: benchmark_id'
|
||||
description: The ID of the benchmark to get.
|
||||
title: Benchmark Id
|
||||
description: The ID of the benchmark to get.
|
||||
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
|
||||
post:
|
||||
responses:
|
||||
|
|
@ -6517,20 +6519,23 @@ components:
|
|||
type: string
|
||||
const: benchmark
|
||||
title: Type
|
||||
description: The resource type, always benchmark.
|
||||
default: benchmark
|
||||
dataset_id:
|
||||
type: string
|
||||
title: Dataset Id
|
||||
description: Identifier of the dataset to use for the benchmark evaluation.
|
||||
scoring_functions:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
title: Scoring Functions
|
||||
description: List of scoring function identifiers to apply during evaluation.
|
||||
metadata:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
title: Metadata
|
||||
description: Metadata for this evaluation task
|
||||
description: Metadata for this evaluation task.
|
||||
type: object
|
||||
required:
|
||||
- identifier
|
||||
|
|
@ -6546,10 +6551,12 @@ components:
|
|||
$ref: '#/components/schemas/Benchmark'
|
||||
type: array
|
||||
title: Data
|
||||
description: List of benchmark objects.
|
||||
type: object
|
||||
required:
|
||||
- data
|
||||
title: ListBenchmarksResponse
|
||||
description: Response containing a list of benchmark objects.
|
||||
BenchmarkConfig:
|
||||
properties:
|
||||
eval_candidate:
|
||||
|
|
@ -7346,6 +7353,45 @@ components:
|
|||
- $ref: '#/components/schemas/RowsDataSource'
|
||||
title: RowsDataSource
|
||||
title: URIDataSource | RowsDataSource
|
||||
RegisterBenchmarkRequest:
|
||||
properties:
|
||||
benchmark_id:
|
||||
type: string
|
||||
title: Benchmark Id
|
||||
description: The ID of the benchmark to register.
|
||||
dataset_id:
|
||||
type: string
|
||||
title: Dataset Id
|
||||
description: The ID of the dataset to use for the benchmark.
|
||||
scoring_functions:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
title: Scoring Functions
|
||||
description: The scoring functions to use for the benchmark.
|
||||
provider_benchmark_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
description: The ID of the provider benchmark to use for the benchmark.
|
||||
provider_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
description: The ID of the provider to use for the benchmark.
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
description: The metadata to use for the benchmark.
|
||||
type: object
|
||||
required:
|
||||
- benchmark_id
|
||||
- dataset_id
|
||||
- scoring_functions
|
||||
title: RegisterBenchmarkRequest
|
||||
description: Request model for registering a benchmark.
|
||||
AllowedToolsFilter:
|
||||
properties:
|
||||
tool_names:
|
||||
|
|
@ -8395,6 +8441,33 @@ components:
|
|||
- batch_id
|
||||
title: CancelBatchRequest
|
||||
type: object
|
||||
ListBenchmarksRequest:
|
||||
description: Request model for listing benchmarks.
|
||||
properties: {}
|
||||
title: ListBenchmarksRequest
|
||||
type: object
|
||||
GetBenchmarkRequest:
|
||||
description: Request model for getting a benchmark.
|
||||
properties:
|
||||
benchmark_id:
|
||||
description: The ID of the benchmark to get.
|
||||
title: Benchmark Id
|
||||
type: string
|
||||
required:
|
||||
- benchmark_id
|
||||
title: GetBenchmarkRequest
|
||||
type: object
|
||||
UnregisterBenchmarkRequest:
|
||||
description: Request model for unregistering a benchmark.
|
||||
properties:
|
||||
benchmark_id:
|
||||
description: The ID of the benchmark to unregister.
|
||||
title: Benchmark Id
|
||||
type: string
|
||||
required:
|
||||
- benchmark_id
|
||||
title: UnregisterBenchmarkRequest
|
||||
type: object
|
||||
DialogType:
|
||||
description: Parameter type for dialog data with semantic output labels.
|
||||
properties:
|
||||
|
|
|
|||
73
docs/static/llama-stack-spec.yaml
vendored
73
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -9166,20 +9166,23 @@ components:
|
|||
type: string
|
||||
const: benchmark
|
||||
title: Type
|
||||
description: The resource type, always benchmark.
|
||||
default: benchmark
|
||||
dataset_id:
|
||||
type: string
|
||||
title: Dataset Id
|
||||
description: Identifier of the dataset to use for the benchmark evaluation.
|
||||
scoring_functions:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
title: Scoring Functions
|
||||
description: List of scoring function identifiers to apply during evaluation.
|
||||
metadata:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
title: Metadata
|
||||
description: Metadata for this evaluation task
|
||||
description: Metadata for this evaluation task.
|
||||
type: object
|
||||
required:
|
||||
- identifier
|
||||
|
|
@ -9195,10 +9198,12 @@ components:
|
|||
$ref: '#/components/schemas/Benchmark'
|
||||
type: array
|
||||
title: Data
|
||||
description: List of benchmark objects.
|
||||
type: object
|
||||
required:
|
||||
- data
|
||||
title: ListBenchmarksResponse
|
||||
description: Response containing a list of benchmark objects.
|
||||
BenchmarkConfig:
|
||||
properties:
|
||||
eval_candidate:
|
||||
|
|
@ -9848,6 +9853,45 @@ components:
|
|||
- $ref: '#/components/schemas/RowsDataSource'
|
||||
title: RowsDataSource
|
||||
title: URIDataSource | RowsDataSource
|
||||
RegisterBenchmarkRequest:
|
||||
properties:
|
||||
benchmark_id:
|
||||
type: string
|
||||
title: Benchmark Id
|
||||
description: The ID of the benchmark to register.
|
||||
dataset_id:
|
||||
type: string
|
||||
title: Dataset Id
|
||||
description: The ID of the dataset to use for the benchmark.
|
||||
scoring_functions:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
title: Scoring Functions
|
||||
description: The scoring functions to use for the benchmark.
|
||||
provider_benchmark_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
description: The ID of the provider benchmark to use for the benchmark.
|
||||
provider_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
description: The ID of the provider to use for the benchmark.
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
description: The metadata to use for the benchmark.
|
||||
type: object
|
||||
required:
|
||||
- benchmark_id
|
||||
- dataset_id
|
||||
- scoring_functions
|
||||
title: RegisterBenchmarkRequest
|
||||
description: Request model for registering a benchmark.
|
||||
AllowedToolsFilter:
|
||||
properties:
|
||||
tool_names:
|
||||
|
|
@ -11053,6 +11097,33 @@ components:
|
|||
- batch_id
|
||||
title: CancelBatchRequest
|
||||
type: object
|
||||
ListBenchmarksRequest:
|
||||
description: Request model for listing benchmarks.
|
||||
properties: {}
|
||||
title: ListBenchmarksRequest
|
||||
type: object
|
||||
GetBenchmarkRequest:
|
||||
description: Request model for getting a benchmark.
|
||||
properties:
|
||||
benchmark_id:
|
||||
description: The ID of the benchmark to get.
|
||||
title: Benchmark Id
|
||||
type: string
|
||||
required:
|
||||
- benchmark_id
|
||||
title: GetBenchmarkRequest
|
||||
type: object
|
||||
UnregisterBenchmarkRequest:
|
||||
description: Request model for unregistering a benchmark.
|
||||
properties:
|
||||
benchmark_id:
|
||||
description: The ID of the benchmark to unregister.
|
||||
title: Benchmark Id
|
||||
type: string
|
||||
required:
|
||||
- benchmark_id
|
||||
title: UnregisterBenchmarkRequest
|
||||
type: object
|
||||
DialogType:
|
||||
description: Parameter type for dialog data with semantic output labels.
|
||||
properties:
|
||||
|
|
|
|||
77
docs/static/stainless-llama-stack-spec.yaml
vendored
77
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -3404,7 +3404,7 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: List Benchmarks
|
||||
summary: List all benchmarks.
|
||||
description: List all benchmarks.
|
||||
operationId: list_benchmarks_v1alpha_eval_benchmarks_get
|
||||
post:
|
||||
|
|
@ -3422,10 +3422,10 @@ paths:
|
|||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
'204':
|
||||
description: Successful Response
|
||||
description: The benchmark was successfully registered.
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Register Benchmark
|
||||
summary: Register a benchmark.
|
||||
description: Register a benchmark.
|
||||
operationId: register_benchmark_v1alpha_eval_benchmarks_post
|
||||
requestBody:
|
||||
|
|
@ -3445,20 +3445,20 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/Benchmark'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
description: Bad Request
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
description: Too Many Requests
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
description: Internal Server Error
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
description: Default Response
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Get Benchmark
|
||||
summary: Get a benchmark by its ID.
|
||||
description: Get a benchmark by its ID.
|
||||
operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
|
||||
parameters:
|
||||
|
|
@ -3467,26 +3467,28 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: benchmark_id'
|
||||
description: The ID of the benchmark to get.
|
||||
title: Benchmark Id
|
||||
description: The ID of the benchmark to get.
|
||||
delete:
|
||||
responses:
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
description: Bad Request
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
description: Too Many Requests
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
description: Internal Server Error
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
description: Default Response
|
||||
'204':
|
||||
description: Successful Response
|
||||
description: The benchmark was successfully unregistered.
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Unregister Benchmark
|
||||
summary: Unregister a benchmark.
|
||||
description: Unregister a benchmark.
|
||||
operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
|
||||
parameters:
|
||||
|
|
@ -3495,7 +3497,9 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: benchmark_id'
|
||||
description: The ID of the benchmark to unregister.
|
||||
title: Benchmark Id
|
||||
description: The ID of the benchmark to unregister.
|
||||
deprecated: true
|
||||
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
|
||||
post:
|
||||
|
|
@ -10391,20 +10395,23 @@ components:
|
|||
type: string
|
||||
const: benchmark
|
||||
title: Type
|
||||
description: The resource type, always benchmark.
|
||||
default: benchmark
|
||||
dataset_id:
|
||||
type: string
|
||||
title: Dataset Id
|
||||
description: Identifier of the dataset to use for the benchmark evaluation.
|
||||
scoring_functions:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
title: Scoring Functions
|
||||
description: List of scoring function identifiers to apply during evaluation.
|
||||
metadata:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
title: Metadata
|
||||
description: Metadata for this evaluation task
|
||||
description: Metadata for this evaluation task.
|
||||
type: object
|
||||
required:
|
||||
- identifier
|
||||
|
|
@ -10420,10 +10427,12 @@ components:
|
|||
$ref: '#/components/schemas/Benchmark'
|
||||
type: array
|
||||
title: Data
|
||||
description: List of benchmark objects.
|
||||
type: object
|
||||
required:
|
||||
- data
|
||||
title: ListBenchmarksResponse
|
||||
description: Response containing a list of benchmark objects.
|
||||
BenchmarkConfig:
|
||||
properties:
|
||||
eval_candidate:
|
||||
|
|
@ -11385,33 +11394,40 @@ components:
|
|||
benchmark_id:
|
||||
type: string
|
||||
title: Benchmark Id
|
||||
description: The ID of the benchmark to register.
|
||||
dataset_id:
|
||||
type: string
|
||||
title: Dataset Id
|
||||
description: The ID of the dataset to use for the benchmark.
|
||||
scoring_functions:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
title: Scoring Functions
|
||||
description: The scoring functions to use for the benchmark.
|
||||
provider_benchmark_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
description: The ID of the provider benchmark to use for the benchmark.
|
||||
provider_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
description: The ID of the provider to use for the benchmark.
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
description: The metadata to use for the benchmark.
|
||||
type: object
|
||||
required:
|
||||
- benchmark_id
|
||||
- dataset_id
|
||||
- scoring_functions
|
||||
title: RegisterBenchmarkRequest
|
||||
description: Request model for registering a benchmark.
|
||||
AllowedToolsFilter:
|
||||
properties:
|
||||
tool_names:
|
||||
|
|
@ -12617,6 +12633,33 @@ components:
|
|||
- batch_id
|
||||
title: CancelBatchRequest
|
||||
type: object
|
||||
ListBenchmarksRequest:
|
||||
description: Request model for listing benchmarks.
|
||||
properties: {}
|
||||
title: ListBenchmarksRequest
|
||||
type: object
|
||||
GetBenchmarkRequest:
|
||||
description: Request model for getting a benchmark.
|
||||
properties:
|
||||
benchmark_id:
|
||||
description: The ID of the benchmark to get.
|
||||
title: Benchmark Id
|
||||
type: string
|
||||
required:
|
||||
- benchmark_id
|
||||
title: GetBenchmarkRequest
|
||||
type: object
|
||||
UnregisterBenchmarkRequest:
|
||||
description: Request model for unregistering a benchmark.
|
||||
properties:
|
||||
benchmark_id:
|
||||
description: The ID of the benchmark to unregister.
|
||||
title: Benchmark Id
|
||||
type: string
|
||||
required:
|
||||
- benchmark_id
|
||||
title: UnregisterBenchmarkRequest
|
||||
type: object
|
||||
DialogType:
|
||||
description: Parameter type for dialog data with semantic output labels.
|
||||
properties:
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from importlib.metadata import version
|
|||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.core.datatypes import StackConfig
|
||||
from llama_stack.core.distribution import builtin_automatically_routed_apis
|
||||
from llama_stack.core.external import load_external_apis
|
||||
from llama_stack.core.server.fastapi_router_registry import (
|
||||
_ROUTER_FACTORIES,
|
||||
|
|
@ -65,6 +66,17 @@ class DistributionInspectImpl(Inspect):
|
|||
def _get_provider_types(api: Api) -> list[str]:
|
||||
if api.value in ["providers", "inspect"]:
|
||||
return [] # These APIs don't have "real" providers they're internal to the stack
|
||||
|
||||
# For routing table APIs, look up providers from their router API
|
||||
# (e.g., benchmarks -> eval, models -> inference, etc.)
|
||||
auto_routed_apis = builtin_automatically_routed_apis()
|
||||
for auto_routed in auto_routed_apis:
|
||||
if auto_routed.routing_table_api == api:
|
||||
# This is a routing table API, use its router API for providers
|
||||
providers = config.providers.get(auto_routed.router_api.value, [])
|
||||
return [p.provider_type for p in providers] if providers else []
|
||||
|
||||
# Regular API, look up providers directly
|
||||
providers = config.providers.get(api.value, [])
|
||||
return [p.provider_type for p in providers] if providers else []
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import json
|
|||
import logging # allow-direct-logging
|
||||
import os
|
||||
import sys
|
||||
import typing
|
||||
from enum import Enum
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
|
@ -490,6 +491,25 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
unwrapped_body_param = param
|
||||
break
|
||||
|
||||
# Check for parameters with Depends() annotation (FastAPI router endpoints)
|
||||
# These need special handling: construct the request model from body
|
||||
depends_param = None
|
||||
for param in params_list:
|
||||
param_type = param.annotation
|
||||
if get_origin(param_type) is typing.Annotated:
|
||||
args = get_args(param_type)
|
||||
if len(args) > 1:
|
||||
# Check if any metadata is Depends
|
||||
metadata = args[1:]
|
||||
for item in metadata:
|
||||
# Check if it's a Depends object (has dependency attribute or is a callable)
|
||||
# Depends objects typically have a 'dependency' attribute or are callable functions
|
||||
if hasattr(item, "dependency") or callable(item) or "Depends" in str(type(item)):
|
||||
depends_param = param
|
||||
break
|
||||
if depends_param:
|
||||
break
|
||||
|
||||
# Convert parameters to Pydantic models where needed
|
||||
converted_body = {}
|
||||
for param_name, param in sig.parameters.items():
|
||||
|
|
@ -500,6 +520,27 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
else:
|
||||
converted_body[param_name] = convert_to_pydantic(param.annotation, value)
|
||||
|
||||
# Handle Depends parameter: construct request model from body
|
||||
if depends_param and depends_param.name not in converted_body:
|
||||
param_type = depends_param.annotation
|
||||
if get_origin(param_type) is typing.Annotated:
|
||||
base_type = get_args(param_type)[0]
|
||||
# Handle Union types (e.g., SomeRequestModel | None) - extract the non-None type
|
||||
# In Python 3.10+, Union types created with | syntax are still typing.Union
|
||||
origin = get_origin(base_type)
|
||||
if origin is Union:
|
||||
# Get the first non-None type from the Union
|
||||
union_args = get_args(base_type)
|
||||
base_type = next(
|
||||
(t for t in union_args if t is not type(None) and t is not None),
|
||||
union_args[0] if union_args else None,
|
||||
)
|
||||
|
||||
# Only try to instantiate if it's a class (not a Union or other non-callable type)
|
||||
if base_type is not None and inspect.isclass(base_type) and callable(base_type):
|
||||
# Construct the request model from all body parameters
|
||||
converted_body[depends_param.name] = base_type(**body)
|
||||
|
||||
# handle unwrapped body parameter after processing all named parameters
|
||||
if unwrapped_body_param:
|
||||
base_type = get_args(unwrapped_body_param.annotation)[0]
|
||||
|
|
|
|||
|
|
@ -4,13 +4,20 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
BenchmarkWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
|
||||
from llama_stack_api import (
|
||||
Benchmark,
|
||||
Benchmarks,
|
||||
GetBenchmarkRequest,
|
||||
ListBenchmarksRequest,
|
||||
ListBenchmarksResponse,
|
||||
RegisterBenchmarkRequest,
|
||||
UnregisterBenchmarkRequest,
|
||||
)
|
||||
|
||||
from .common import CommonRoutingTableImpl
|
||||
|
||||
|
|
@ -18,26 +25,21 @@ logger = get_logger(name=__name__, category="core::routing_tables")
|
|||
|
||||
|
||||
class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
|
||||
async def list_benchmarks(self) -> ListBenchmarksResponse:
|
||||
async def list_benchmarks(self, request: ListBenchmarksRequest) -> ListBenchmarksResponse:
|
||||
return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark"))
|
||||
|
||||
async def get_benchmark(self, benchmark_id: str) -> Benchmark:
|
||||
benchmark = await self.get_object_by_identifier("benchmark", benchmark_id)
|
||||
async def get_benchmark(self, request: GetBenchmarkRequest) -> Benchmark:
|
||||
benchmark = await self.get_object_by_identifier("benchmark", request.benchmark_id)
|
||||
if benchmark is None:
|
||||
raise ValueError(f"Benchmark '{benchmark_id}' not found")
|
||||
raise ValueError(f"Benchmark '{request.benchmark_id}' not found")
|
||||
return benchmark
|
||||
|
||||
async def register_benchmark(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
dataset_id: str,
|
||||
scoring_functions: list[str],
|
||||
metadata: dict[str, Any] | None = None,
|
||||
provider_benchmark_id: str | None = None,
|
||||
provider_id: str | None = None,
|
||||
request: RegisterBenchmarkRequest,
|
||||
) -> None:
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
metadata = request.metadata if request.metadata is not None else {}
|
||||
provider_id = request.provider_id
|
||||
if provider_id is None:
|
||||
if len(self.impls_by_provider_id) == 1:
|
||||
provider_id = list(self.impls_by_provider_id.keys())[0]
|
||||
|
|
@ -45,18 +47,20 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
|
|||
raise ValueError(
|
||||
"No provider specified and multiple providers available. Please specify a provider_id."
|
||||
)
|
||||
provider_benchmark_id = request.provider_benchmark_id
|
||||
if provider_benchmark_id is None:
|
||||
provider_benchmark_id = benchmark_id
|
||||
provider_benchmark_id = request.benchmark_id
|
||||
benchmark = BenchmarkWithOwner(
|
||||
identifier=benchmark_id,
|
||||
dataset_id=dataset_id,
|
||||
scoring_functions=scoring_functions,
|
||||
identifier=request.benchmark_id,
|
||||
dataset_id=request.dataset_id,
|
||||
scoring_functions=request.scoring_functions,
|
||||
metadata=metadata,
|
||||
provider_id=provider_id,
|
||||
provider_resource_id=provider_benchmark_id,
|
||||
)
|
||||
await self.register_object(benchmark)
|
||||
|
||||
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||
existing_benchmark = await self.get_benchmark(benchmark_id)
|
||||
async def unregister_benchmark(self, request: UnregisterBenchmarkRequest) -> None:
|
||||
get_request = GetBenchmarkRequest(benchmark_id=request.benchmark_id)
|
||||
existing_benchmark = await self.get_benchmark(get_request)
|
||||
await self.unregister_object(existing_benchmark)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from fastapi import APIRouter
|
|||
from fastapi.routing import APIRoute
|
||||
from starlette.routing import Route
|
||||
|
||||
from llama_stack_api import batches
|
||||
from llama_stack_api import batches, benchmarks
|
||||
|
||||
# Router factories for APIs that have FastAPI routers
|
||||
# Add new APIs here as they are migrated to the router system
|
||||
|
|
@ -25,6 +25,7 @@ from llama_stack_api.datatypes import Api
|
|||
|
||||
_ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = {
|
||||
"batches": batches.fastapi_routes.create_router,
|
||||
"benchmarks": benchmarks.fastapi_routes.create_router,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,11 @@ from aiohttp import hdrs
|
|||
from starlette.routing import Route
|
||||
|
||||
from llama_stack.core.resolver import api_protocol_map
|
||||
from llama_stack.core.server.fastapi_router_registry import (
|
||||
_ROUTER_FACTORIES,
|
||||
build_fastapi_router,
|
||||
get_router_routes,
|
||||
)
|
||||
from llama_stack_api import Api, ExternalApiSpec, WebMethod
|
||||
|
||||
EndpointFunc = Callable[..., Any]
|
||||
|
|
@ -85,7 +90,53 @@ def initialize_route_impls(impls, external_apis: dict[Api, ExternalApiSpec] | No
|
|||
|
||||
return f"^{pattern}$"
|
||||
|
||||
# Process routes from FastAPI routers
|
||||
for api_name in _ROUTER_FACTORIES.keys():
|
||||
api = Api(api_name)
|
||||
if api not in impls:
|
||||
continue
|
||||
impl = impls[api]
|
||||
router = build_fastapi_router(api, impl)
|
||||
if router:
|
||||
router_routes = get_router_routes(router)
|
||||
for route in router_routes:
|
||||
# Get the endpoint function from the route
|
||||
# For FastAPI routes, the endpoint is the actual function
|
||||
func = route.endpoint
|
||||
if func is None:
|
||||
continue
|
||||
|
||||
# Get the first (and typically only) method from the set, filtering out HEAD
|
||||
available_methods = [m for m in (route.methods or []) if m != "HEAD"]
|
||||
if not available_methods:
|
||||
continue # Skip if only HEAD method is available
|
||||
method = available_methods[0].lower()
|
||||
|
||||
if method not in route_impls:
|
||||
route_impls[method] = {}
|
||||
|
||||
# Create a minimal WebMethod for router routes (needed for RouteMatch tuple)
|
||||
# We don't have webmethod metadata for router routes, so create a minimal one
|
||||
# that has the attributes used by the library client (descriptive_name for tracing)
|
||||
#
|
||||
# TODO: Long-term migration plan (once all APIs are migrated to FastAPI routers):
|
||||
# - Extract summary from APIRoute: route.summary (available on FastAPI APIRoute objects)
|
||||
# - Pass summary directly in RouteMatch instead of WebMethod
|
||||
# - Remove this WebMethod() instantiation entirely
|
||||
# - Update library_client.py to use the extracted summary instead of webmethod.descriptive_name
|
||||
webmethod = WebMethod(descriptive_name=None)
|
||||
route_impls[method][_convert_path_to_regex(route.path)] = (
|
||||
func,
|
||||
route.path,
|
||||
webmethod,
|
||||
)
|
||||
|
||||
# Process routes from legacy webmethod-based APIs
|
||||
for api, api_routes in api_to_routes.items():
|
||||
# Skip APIs that have routers (already processed above)
|
||||
if api.value in _ROUTER_FACTORIES:
|
||||
continue
|
||||
|
||||
if api not in impls:
|
||||
continue
|
||||
for route, webmethod in api_routes:
|
||||
|
|
|
|||
|
|
@ -6,12 +6,14 @@
|
|||
|
||||
import asyncio
|
||||
import importlib.resources
|
||||
import inspect
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from typing import Any
|
||||
from typing import Any, get_type_hints
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
||||
from llama_stack.core.datatypes import Provider, SafetyConfig, StackConfig, VectorStoresConfig
|
||||
|
|
@ -108,6 +110,81 @@ REGISTRY_REFRESH_TASK = None
|
|||
TEST_RECORDING_CONTEXT = None
|
||||
|
||||
|
||||
def is_request_model(t: Any) -> bool:
|
||||
"""Check if a type is a request model (Pydantic BaseModel).
|
||||
|
||||
Args:
|
||||
t: The type to check
|
||||
|
||||
Returns:
|
||||
True if the type is a Pydantic BaseModel subclass, False otherwise
|
||||
"""
|
||||
|
||||
return inspect.isclass(t) and issubclass(t, BaseModel)
|
||||
|
||||
|
||||
async def invoke_with_optional_request(method: Any) -> Any:
|
||||
"""Invoke a method, automatically creating a request instance if needed.
|
||||
|
||||
For APIs that use request models, this will create an empty request object.
|
||||
For backward compatibility, falls back to calling without arguments.
|
||||
|
||||
Uses get_type_hints() to resolve forward references (e.g., "ListBenchmarksRequest" -> actual class).
|
||||
|
||||
Handles methods with:
|
||||
- No parameters: calls without arguments
|
||||
- One or more request model parameters: creates empty instances for each
|
||||
- Mixed parameters: creates request models, uses defaults for others
|
||||
- Required non-request-model parameters without defaults: falls back to calling without arguments
|
||||
|
||||
Args:
|
||||
method: The method to invoke
|
||||
|
||||
Returns:
|
||||
The result of calling the method
|
||||
"""
|
||||
try:
|
||||
hints = get_type_hints(method)
|
||||
except Exception:
|
||||
# Forward references can't be resolved, fall back to calling without request
|
||||
return await method()
|
||||
|
||||
params = list(inspect.signature(method).parameters.values())
|
||||
params = [p for p in params if p.name != "self"]
|
||||
|
||||
if not params:
|
||||
return await method()
|
||||
|
||||
# Build arguments for the method call
|
||||
args: dict[str, Any] = {}
|
||||
can_call = True
|
||||
|
||||
for param in params:
|
||||
param_type = hints.get(param.name)
|
||||
|
||||
# If it's a request model, try to create an empty instance
|
||||
if param_type and is_request_model(param_type):
|
||||
try:
|
||||
args[param.name] = param_type()
|
||||
except Exception:
|
||||
# Request model requires arguments, can't create empty instance
|
||||
can_call = False
|
||||
break
|
||||
# If it has a default value, we can skip it (will use default)
|
||||
elif param.default != inspect.Parameter.empty:
|
||||
continue
|
||||
# Required parameter that's not a request model - can't provide it
|
||||
else:
|
||||
can_call = False
|
||||
break
|
||||
|
||||
if can_call and args:
|
||||
return await method(**args)
|
||||
|
||||
# Fall back to calling without arguments for backward compatibility
|
||||
return await method()
|
||||
|
||||
|
||||
async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
|
||||
for rsrc, api, register_method, list_method in RESOURCES:
|
||||
objects = getattr(run_config.registered_resources, rsrc)
|
||||
|
|
@ -129,7 +206,7 @@ async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
|
|||
await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
|
||||
|
||||
method = getattr(impls[api], list_method)
|
||||
response = await method()
|
||||
response = await invoke_with_optional_request(method)
|
||||
|
||||
objects_to_process = response.data if hasattr(response, "data") else response
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,11 @@ from .benchmarks import (
|
|||
BenchmarkInput,
|
||||
Benchmarks,
|
||||
CommonBenchmarkFields,
|
||||
GetBenchmarkRequest,
|
||||
ListBenchmarksRequest,
|
||||
ListBenchmarksResponse,
|
||||
RegisterBenchmarkRequest,
|
||||
UnregisterBenchmarkRequest,
|
||||
)
|
||||
|
||||
# Import commonly used types from common submodule
|
||||
|
|
@ -567,7 +571,11 @@ __all__ = [
|
|||
"LLMRAGQueryGeneratorConfig",
|
||||
"ListBatchesResponse",
|
||||
"RetrieveBatchRequest",
|
||||
"GetBenchmarkRequest",
|
||||
"ListBenchmarksRequest",
|
||||
"ListBenchmarksResponse",
|
||||
"RegisterBenchmarkRequest",
|
||||
"UnregisterBenchmarkRequest",
|
||||
"ListDatasetsResponse",
|
||||
"ListModelsResponse",
|
||||
"ListOpenAIChatCompletionResponse",
|
||||
|
|
|
|||
|
|
@ -1,105 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from typing import Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack_api.resource import Resource, ResourceType
|
||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
|
||||
|
||||
|
||||
class CommonBenchmarkFields(BaseModel):
|
||||
dataset_id: str
|
||||
scoring_functions: list[str]
|
||||
metadata: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Metadata for this evaluation task",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Benchmark(CommonBenchmarkFields, Resource):
|
||||
"""A benchmark resource for evaluating model performance.
|
||||
|
||||
:param dataset_id: Identifier of the dataset to use for the benchmark evaluation
|
||||
:param scoring_functions: List of scoring function identifiers to apply during evaluation
|
||||
:param metadata: Metadata for this evaluation task
|
||||
:param type: The resource type, always benchmark
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.benchmark] = ResourceType.benchmark
|
||||
|
||||
@property
|
||||
def benchmark_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_benchmark_id(self) -> str | None:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class BenchmarkInput(CommonBenchmarkFields, BaseModel):
|
||||
benchmark_id: str
|
||||
provider_id: str | None = None
|
||||
provider_benchmark_id: str | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListBenchmarksResponse(BaseModel):
|
||||
data: list[Benchmark]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Benchmarks(Protocol):
|
||||
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def list_benchmarks(self) -> ListBenchmarksResponse:
|
||||
"""List all benchmarks.
|
||||
|
||||
:returns: A ListBenchmarksResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def get_benchmark(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
) -> Benchmark:
|
||||
"""Get a benchmark by its ID.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to get.
|
||||
:returns: A Benchmark.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
|
||||
async def register_benchmark(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
dataset_id: str,
|
||||
scoring_functions: list[str],
|
||||
provider_benchmark_id: str | None = None,
|
||||
provider_id: str | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Register a benchmark.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to register.
|
||||
:param dataset_id: The ID of the dataset to use for the benchmark.
|
||||
:param scoring_functions: The scoring functions to use for the benchmark.
|
||||
:param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
|
||||
:param provider_id: The ID of the provider to use for the benchmark.
|
||||
:param metadata: The metadata to use for the benchmark.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
|
||||
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||
"""Unregister a benchmark.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to unregister.
|
||||
"""
|
||||
...
|
||||
43
src/llama_stack_api/benchmarks/__init__.py
Normal file
43
src/llama_stack_api/benchmarks/__init__.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""Benchmarks API protocol and models.
|
||||
|
||||
This module contains the Benchmarks protocol definition.
|
||||
Pydantic models are defined in llama_stack_api.benchmarks.models.
|
||||
The FastAPI router is defined in llama_stack_api.benchmarks.fastapi_routes.
|
||||
"""
|
||||
|
||||
# Import fastapi_routes for router factory access
|
||||
from . import fastapi_routes
|
||||
|
||||
# Import protocol for re-export
|
||||
from .api import Benchmarks
|
||||
|
||||
# Import models for re-export
|
||||
from .models import (
|
||||
Benchmark,
|
||||
BenchmarkInput,
|
||||
CommonBenchmarkFields,
|
||||
GetBenchmarkRequest,
|
||||
ListBenchmarksRequest,
|
||||
ListBenchmarksResponse,
|
||||
RegisterBenchmarkRequest,
|
||||
UnregisterBenchmarkRequest,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Benchmarks",
|
||||
"Benchmark",
|
||||
"BenchmarkInput",
|
||||
"CommonBenchmarkFields",
|
||||
"ListBenchmarksResponse",
|
||||
"ListBenchmarksRequest",
|
||||
"GetBenchmarkRequest",
|
||||
"RegisterBenchmarkRequest",
|
||||
"UnregisterBenchmarkRequest",
|
||||
"fastapi_routes",
|
||||
]
|
||||
39
src/llama_stack_api/benchmarks/api.py
Normal file
39
src/llama_stack_api/benchmarks/api.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
from .models import (
|
||||
Benchmark,
|
||||
GetBenchmarkRequest,
|
||||
ListBenchmarksRequest,
|
||||
ListBenchmarksResponse,
|
||||
RegisterBenchmarkRequest,
|
||||
UnregisterBenchmarkRequest,
|
||||
)
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Benchmarks(Protocol):
|
||||
async def list_benchmarks(
|
||||
self,
|
||||
request: ListBenchmarksRequest,
|
||||
) -> ListBenchmarksResponse: ...
|
||||
|
||||
async def get_benchmark(
|
||||
self,
|
||||
request: GetBenchmarkRequest,
|
||||
) -> Benchmark: ...
|
||||
|
||||
async def register_benchmark(
|
||||
self,
|
||||
request: RegisterBenchmarkRequest,
|
||||
) -> None: ...
|
||||
|
||||
async def unregister_benchmark(
|
||||
self,
|
||||
request: UnregisterBenchmarkRequest,
|
||||
) -> None: ...
|
||||
109
src/llama_stack_api/benchmarks/fastapi_routes.py
Normal file
109
src/llama_stack_api/benchmarks/fastapi_routes.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""FastAPI router for the Benchmarks API.
|
||||
|
||||
This module defines the FastAPI router for the Benchmarks API using standard
|
||||
FastAPI route decorators. The router is defined in the API package to keep
|
||||
all API-related code together.
|
||||
"""
|
||||
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Body, Depends
|
||||
|
||||
from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
|
||||
from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
|
||||
|
||||
from .api import Benchmarks
|
||||
from .models import (
|
||||
Benchmark,
|
||||
GetBenchmarkRequest,
|
||||
ListBenchmarksRequest,
|
||||
ListBenchmarksResponse,
|
||||
RegisterBenchmarkRequest,
|
||||
UnregisterBenchmarkRequest,
|
||||
)
|
||||
|
||||
# Automatically generate dependency functions from Pydantic models
|
||||
# This ensures the models are the single source of truth for descriptions
|
||||
get_list_benchmarks_request = create_query_dependency(ListBenchmarksRequest)
|
||||
get_get_benchmark_request = create_path_dependency(GetBenchmarkRequest)
|
||||
get_unregister_benchmark_request = create_path_dependency(UnregisterBenchmarkRequest)
|
||||
|
||||
|
||||
def create_router(impl: Benchmarks) -> APIRouter:
|
||||
"""Create a FastAPI router for the Benchmarks API.
|
||||
|
||||
Args:
|
||||
impl: The Benchmarks implementation instance
|
||||
|
||||
Returns:
|
||||
APIRouter configured for the Benchmarks API
|
||||
"""
|
||||
router = APIRouter(
|
||||
prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
|
||||
tags=["Benchmarks"],
|
||||
responses=standard_responses,
|
||||
)
|
||||
|
||||
@router.get(
|
||||
"/eval/benchmarks",
|
||||
response_model=ListBenchmarksResponse,
|
||||
summary="List all benchmarks.",
|
||||
description="List all benchmarks.",
|
||||
responses={
|
||||
200: {"description": "A ListBenchmarksResponse."},
|
||||
},
|
||||
)
|
||||
async def list_benchmarks(
|
||||
request: Annotated[ListBenchmarksRequest, Depends(get_list_benchmarks_request)],
|
||||
) -> ListBenchmarksResponse:
|
||||
return await impl.list_benchmarks(request)
|
||||
|
||||
@router.get(
|
||||
"/eval/benchmarks/{benchmark_id}",
|
||||
response_model=Benchmark,
|
||||
summary="Get a benchmark by its ID.",
|
||||
description="Get a benchmark by its ID.",
|
||||
responses={
|
||||
200: {"description": "A Benchmark."},
|
||||
},
|
||||
)
|
||||
async def get_benchmark(
|
||||
request: Annotated[GetBenchmarkRequest, Depends(get_get_benchmark_request)],
|
||||
) -> Benchmark:
|
||||
return await impl.get_benchmark(request)
|
||||
|
||||
@router.post(
|
||||
"/eval/benchmarks",
|
||||
summary="Register a benchmark.",
|
||||
description="Register a benchmark.",
|
||||
responses={
|
||||
200: {"description": "The benchmark was successfully registered."},
|
||||
},
|
||||
deprecated=True,
|
||||
)
|
||||
async def register_benchmark(
|
||||
request: Annotated[RegisterBenchmarkRequest, Body(...)],
|
||||
) -> None:
|
||||
return await impl.register_benchmark(request)
|
||||
|
||||
@router.delete(
|
||||
"/eval/benchmarks/{benchmark_id}",
|
||||
summary="Unregister a benchmark.",
|
||||
description="Unregister a benchmark.",
|
||||
responses={
|
||||
200: {"description": "The benchmark was successfully unregistered."},
|
||||
},
|
||||
deprecated=True,
|
||||
)
|
||||
async def unregister_benchmark(
|
||||
request: Annotated[UnregisterBenchmarkRequest, Depends(get_unregister_benchmark_request)],
|
||||
) -> None:
|
||||
return await impl.unregister_benchmark(request)
|
||||
|
||||
return router
|
||||
109
src/llama_stack_api/benchmarks/models.py
Normal file
109
src/llama_stack_api/benchmarks/models.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""Pydantic models for Benchmarks API requests and responses.
|
||||
|
||||
This module defines the request and response models for the Benchmarks API
|
||||
using Pydantic with Field descriptions for OpenAPI schema generation.
|
||||
"""
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack_api.resource import Resource, ResourceType
|
||||
from llama_stack_api.schema_utils import json_schema_type
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListBenchmarksRequest(BaseModel):
|
||||
"""Request model for listing benchmarks."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class GetBenchmarkRequest(BaseModel):
|
||||
"""Request model for getting a benchmark."""
|
||||
|
||||
benchmark_id: str = Field(..., description="The ID of the benchmark to get.")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RegisterBenchmarkRequest(BaseModel):
|
||||
"""Request model for registering a benchmark."""
|
||||
|
||||
benchmark_id: str = Field(..., description="The ID of the benchmark to register.")
|
||||
dataset_id: str = Field(..., description="The ID of the dataset to use for the benchmark.")
|
||||
scoring_functions: list[str] = Field(..., description="The scoring functions to use for the benchmark.")
|
||||
provider_benchmark_id: str | None = Field(
|
||||
default=None, description="The ID of the provider benchmark to use for the benchmark."
|
||||
)
|
||||
provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
|
||||
metadata: dict[str, Any] | None = Field(default=None, description="The metadata to use for the benchmark.")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class UnregisterBenchmarkRequest(BaseModel):
|
||||
"""Request model for unregistering a benchmark."""
|
||||
|
||||
benchmark_id: str = Field(..., description="The ID of the benchmark to unregister.")
|
||||
|
||||
|
||||
class CommonBenchmarkFields(BaseModel):
|
||||
dataset_id: str = Field(..., description="Identifier of the dataset to use for the benchmark evaluation.")
|
||||
scoring_functions: list[str] = Field(
|
||||
..., description="List of scoring function identifiers to apply during evaluation."
|
||||
)
|
||||
metadata: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Metadata for this evaluation task.",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Benchmark(CommonBenchmarkFields, Resource):
|
||||
"""A benchmark resource for evaluating model performance."""
|
||||
|
||||
type: Literal[ResourceType.benchmark] = Field(
|
||||
default=ResourceType.benchmark,
|
||||
description="The resource type, always benchmark.",
|
||||
)
|
||||
|
||||
@property
|
||||
def benchmark_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_benchmark_id(self) -> str | None:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class BenchmarkInput(CommonBenchmarkFields, BaseModel):
|
||||
benchmark_id: str = Field(..., description="The ID of the benchmark.")
|
||||
provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
|
||||
provider_benchmark_id: str | None = Field(
|
||||
default=None, description="The ID of the provider benchmark to use for the benchmark."
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListBenchmarksResponse(BaseModel):
|
||||
"""Response containing a list of benchmark objects."""
|
||||
|
||||
data: list[Benchmark] = Field(..., description="List of benchmark objects.")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ListBenchmarksRequest",
|
||||
"GetBenchmarkRequest",
|
||||
"RegisterBenchmarkRequest",
|
||||
"UnregisterBenchmarkRequest",
|
||||
"CommonBenchmarkFields",
|
||||
"Benchmark",
|
||||
"BenchmarkInput",
|
||||
"ListBenchmarksResponse",
|
||||
]
|
||||
|
|
@ -22,14 +22,17 @@ from llama_stack_api import (
|
|||
Api,
|
||||
Dataset,
|
||||
DatasetPurpose,
|
||||
ListBenchmarksRequest,
|
||||
ListToolDefsResponse,
|
||||
Model,
|
||||
ModelNotFoundError,
|
||||
ModelType,
|
||||
NumberType,
|
||||
RegisterBenchmarkRequest,
|
||||
Shield,
|
||||
ToolDef,
|
||||
ToolGroup,
|
||||
UnregisterBenchmarkRequest,
|
||||
URIDataSource,
|
||||
)
|
||||
|
||||
|
|
@ -420,24 +423,26 @@ async def test_benchmarks_routing_table(cached_disk_dist_registry):
|
|||
|
||||
# Register multiple benchmarks and verify listing
|
||||
await table.register_benchmark(
|
||||
benchmark_id="test-benchmark",
|
||||
dataset_id="test-dataset",
|
||||
scoring_functions=["test-scoring-fn", "test-scoring-fn-2"],
|
||||
RegisterBenchmarkRequest(
|
||||
benchmark_id="test-benchmark",
|
||||
dataset_id="test-dataset",
|
||||
scoring_functions=["test-scoring-fn", "test-scoring-fn-2"],
|
||||
)
|
||||
)
|
||||
benchmarks = await table.list_benchmarks()
|
||||
benchmarks = await table.list_benchmarks(ListBenchmarksRequest())
|
||||
|
||||
assert len(benchmarks.data) == 1
|
||||
benchmark_ids = {b.identifier for b in benchmarks.data}
|
||||
assert "test-benchmark" in benchmark_ids
|
||||
|
||||
# Unregister the benchmark and verify removal
|
||||
await table.unregister_benchmark(benchmark_id="test-benchmark")
|
||||
benchmarks_after = await table.list_benchmarks()
|
||||
await table.unregister_benchmark(UnregisterBenchmarkRequest(benchmark_id="test-benchmark"))
|
||||
benchmarks_after = await table.list_benchmarks(ListBenchmarksRequest())
|
||||
assert len(benchmarks_after.data) == 0
|
||||
|
||||
# Unregistering a non-existent benchmark should raise a clear error
|
||||
with pytest.raises(ValueError, match="Benchmark 'dummy_benchmark' not found"):
|
||||
await table.unregister_benchmark(benchmark_id="dummy_benchmark")
|
||||
await table.unregister_benchmark(UnregisterBenchmarkRequest(benchmark_id="dummy_benchmark"))
|
||||
|
||||
|
||||
async def test_tool_groups_routing_table(cached_disk_dist_registry):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue