feat: convert Datasets API to use FastAPI router (#4359)

# What does this PR do?

Convert the Datasets API from webmethod decorators to FastAPI router
pattern.

Fixes: https://github.com/llamastack/llama-stack/issues/4344

## Test Plan
CI

Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
Sébastien Han 2025-12-15 20:23:04 +01:00 committed by GitHub
parent 56f946f3f5
commit 700663028f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 716 additions and 335 deletions

View file

@ -3268,7 +3268,7 @@ paths:
get:
responses:
'200':
description: A ListDatasetsResponse.
description: A list of dataset objects.
content:
application/json:
schema:
@ -3287,13 +3287,13 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: List Datasets
summary: List all datasets.
description: List all datasets.
operationId: list_datasets_v1beta_datasets_get
post:
responses:
'200':
description: A Dataset.
description: The registered dataset object.
content:
application/json:
schema:
@ -3312,7 +3312,7 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: Register Dataset
summary: Register a new dataset.
description: Register a new dataset.
operationId: register_dataset_v1beta_datasets_post
requestBody:
@ -3326,26 +3326,26 @@ paths:
get:
responses:
'200':
description: A Dataset.
description: The dataset object.
content:
application/json:
schema:
$ref: '#/components/schemas/Dataset'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
description: Default Response
tags:
- Datasets
summary: Get Dataset
summary: Get a dataset by its ID.
description: Get a dataset by its ID.
operationId: get_dataset_v1beta_datasets__dataset_id__get
parameters:
@ -3354,26 +3354,28 @@ paths:
required: true
schema:
type: string
description: 'Path parameter: dataset_id'
description: The ID of the dataset to get.
title: Dataset Id
description: The ID of the dataset to get.
delete:
responses:
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
description: Default Response
'204':
description: Successful Response
description: The dataset was successfully unregistered.
tags:
- Datasets
summary: Unregister Dataset
summary: Unregister a dataset by its ID.
description: Unregister a dataset by its ID.
operationId: unregister_dataset_v1beta_datasets__dataset_id__delete
parameters:
@ -3382,7 +3384,9 @@ paths:
required: true
schema:
type: string
description: 'Path parameter: dataset_id'
description: The ID of the dataset to unregister.
title: Dataset Id
description: The ID of the dataset to unregister.
deprecated: true
/v1alpha/eval/benchmarks:
get:
@ -10570,9 +10574,11 @@ components:
type: string
const: dataset
title: Type
description: Type of resource, always 'dataset' for datasets
default: dataset
purpose:
$ref: '#/components/schemas/DatasetPurpose'
description: Purpose of the dataset indicating its intended use
source:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
@ -10580,6 +10586,7 @@ components:
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
description: Data source configuration for the dataset
discriminator:
propertyName: type
mapping:
@ -10604,6 +10611,7 @@ components:
type: string
const: rows
title: Type
description: The type of data source.
default: rows
rows:
items:
@ -10611,6 +10619,7 @@ components:
type: object
type: array
title: Rows
description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]'
type: object
required:
- rows
@ -10622,10 +10631,12 @@ components:
type: string
const: uri
title: Type
description: The type of data source.
default: uri
uri:
type: string
title: Uri
description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"
type: object
required:
- uri
@ -10638,6 +10649,7 @@ components:
$ref: '#/components/schemas/Dataset'
type: array
title: Data
description: List of datasets
type: object
required:
- data
@ -11635,27 +11647,37 @@ components:
properties:
purpose:
$ref: '#/components/schemas/DatasetPurpose'
description: The purpose of the dataset.
source:
anyOf:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
description: The data source of the dataset.
discriminator:
propertyName: type
mapping:
rows: '#/components/schemas/RowsDataSource'
uri: '#/components/schemas/URIDataSource'
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
description: The metadata for the dataset.
dataset_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the dataset. If not provided, an ID will be generated.
type: object
required:
- purpose
- source
title: RegisterDatasetRequest
description: Request model for registering a dataset.
RegisterBenchmarkRequest:
properties:
benchmark_id:
@ -13488,6 +13510,28 @@ components:
- items
title: ConversationItemCreateRequest
type: object
GetDatasetRequest:
description: Request model for getting a dataset by ID.
properties:
dataset_id:
description: The ID of the dataset to get.
title: Dataset Id
type: string
required:
- dataset_id
title: GetDatasetRequest
type: object
UnregisterDatasetRequest:
description: Request model for unregistering a dataset.
properties:
dataset_id:
description: The ID of the dataset to unregister.
title: Dataset Id
type: string
required:
- dataset_id
title: UnregisterDatasetRequest
type: object
Api:
description: Enumeration of all available APIs in the Llama Stack system.
enum:

View file

@ -655,7 +655,7 @@ paths:
get:
responses:
'200':
description: A ListDatasetsResponse.
description: A list of dataset objects.
content:
application/json:
schema:
@ -674,13 +674,13 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: List Datasets
summary: List all datasets.
description: List all datasets.
operationId: list_datasets_v1beta_datasets_get
post:
responses:
'200':
description: A Dataset.
description: The registered dataset object.
content:
application/json:
schema:
@ -699,7 +699,7 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: Register Dataset
summary: Register a new dataset.
description: Register a new dataset.
operationId: register_dataset_v1beta_datasets_post
requestBody:
@ -713,26 +713,26 @@ paths:
get:
responses:
'200':
description: A Dataset.
description: The dataset object.
content:
application/json:
schema:
$ref: '#/components/schemas/Dataset'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
description: Default Response
tags:
- Datasets
summary: Get Dataset
summary: Get a dataset by its ID.
description: Get a dataset by its ID.
operationId: get_dataset_v1beta_datasets__dataset_id__get
parameters:
@ -741,26 +741,28 @@ paths:
required: true
schema:
type: string
description: 'Path parameter: dataset_id'
description: The ID of the dataset to get.
title: Dataset Id
description: The ID of the dataset to get.
delete:
responses:
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
description: Default Response
'204':
description: Successful Response
description: The dataset was successfully unregistered.
tags:
- Datasets
summary: Unregister Dataset
summary: Unregister a dataset by its ID.
description: Unregister a dataset by its ID.
operationId: unregister_dataset_v1beta_datasets__dataset_id__delete
parameters:
@ -769,7 +771,9 @@ paths:
required: true
schema:
type: string
description: 'Path parameter: dataset_id'
description: The ID of the dataset to unregister.
title: Dataset Id
description: The ID of the dataset to unregister.
deprecated: true
/v1alpha/eval/benchmarks:
get:
@ -7396,9 +7400,11 @@ components:
type: string
const: dataset
title: Type
description: Type of resource, always 'dataset' for datasets
default: dataset
purpose:
$ref: '#/components/schemas/DatasetPurpose'
description: Purpose of the dataset indicating its intended use
source:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
@ -7406,6 +7412,7 @@ components:
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
description: Data source configuration for the dataset
discriminator:
propertyName: type
mapping:
@ -7430,6 +7437,7 @@ components:
type: string
const: rows
title: Type
description: The type of data source.
default: rows
rows:
items:
@ -7437,6 +7445,7 @@ components:
type: object
type: array
title: Rows
description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]'
type: object
required:
- rows
@ -7448,10 +7457,12 @@ components:
type: string
const: uri
title: Type
description: The type of data source.
default: uri
uri:
type: string
title: Uri
description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"
type: object
required:
- uri
@ -7464,6 +7475,7 @@ components:
$ref: '#/components/schemas/Dataset'
type: array
title: Data
description: List of datasets
type: object
required:
- data
@ -8461,27 +8473,37 @@ components:
properties:
purpose:
$ref: '#/components/schemas/DatasetPurpose'
description: The purpose of the dataset.
source:
anyOf:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
description: The data source of the dataset.
discriminator:
propertyName: type
mapping:
rows: '#/components/schemas/RowsDataSource'
uri: '#/components/schemas/URIDataSource'
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
description: The metadata for the dataset.
dataset_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the dataset. If not provided, an ID will be generated.
type: object
required:
- purpose
- source
title: RegisterDatasetRequest
description: Request model for registering a dataset.
RegisterBenchmarkRequest:
properties:
benchmark_id:
@ -10314,6 +10336,28 @@ components:
- items
title: ConversationItemCreateRequest
type: object
GetDatasetRequest:
description: Request model for getting a dataset by ID.
properties:
dataset_id:
description: The ID of the dataset to get.
title: Dataset Id
type: string
required:
- dataset_id
title: GetDatasetRequest
type: object
UnregisterDatasetRequest:
description: Request model for unregistering a dataset.
properties:
dataset_id:
description: The ID of the dataset to unregister.
title: Dataset Id
type: string
required:
- dataset_id
title: UnregisterDatasetRequest
type: object
Api:
description: Enumeration of all available APIs in the Llama Stack system.
enum:

View file

@ -110,7 +110,7 @@ paths:
get:
responses:
'200':
description: A ListDatasetsResponse.
description: A list of dataset objects.
content:
application/json:
schema:
@ -129,33 +129,33 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: List Datasets
summary: List all datasets.
description: List all datasets.
operationId: list_datasets_v1beta_datasets_get
/v1beta/datasets/{dataset_id}:
get:
responses:
'200':
description: A Dataset.
description: The dataset object.
content:
application/json:
schema:
$ref: '#/components/schemas/Dataset'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
description: Default Response
tags:
- Datasets
summary: Get Dataset
summary: Get a dataset by its ID.
description: Get a dataset by its ID.
operationId: get_dataset_v1beta_datasets__dataset_id__get
parameters:
@ -164,7 +164,9 @@ paths:
required: true
schema:
type: string
description: 'Path parameter: dataset_id'
description: The ID of the dataset to get.
title: Dataset Id
description: The ID of the dataset to get.
/v1alpha/eval/benchmarks:
get:
responses:
@ -6659,9 +6661,11 @@ components:
type: string
const: dataset
title: Type
description: Type of resource, always 'dataset' for datasets
default: dataset
purpose:
$ref: '#/components/schemas/DatasetPurpose'
description: Purpose of the dataset indicating its intended use
source:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
@ -6669,6 +6673,7 @@ components:
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
description: Data source configuration for the dataset
discriminator:
propertyName: type
mapping:
@ -6693,6 +6698,7 @@ components:
type: string
const: rows
title: Type
description: The type of data source.
default: rows
rows:
items:
@ -6700,6 +6706,7 @@ components:
type: object
type: array
title: Rows
description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]'
type: object
required:
- rows
@ -6711,10 +6718,12 @@ components:
type: string
const: uri
title: Type
description: The type of data source.
default: uri
uri:
type: string
title: Uri
description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"
type: object
required:
- uri
@ -6727,6 +6736,7 @@ components:
$ref: '#/components/schemas/Dataset'
type: array
title: Data
description: List of datasets
type: object
required:
- data
@ -7585,6 +7595,41 @@ components:
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
RegisterDatasetRequest:
properties:
purpose:
$ref: '#/components/schemas/DatasetPurpose'
description: The purpose of the dataset.
source:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
description: The data source of the dataset.
discriminator:
propertyName: type
mapping:
rows: '#/components/schemas/RowsDataSource'
uri: '#/components/schemas/URIDataSource'
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
description: The metadata for the dataset.
dataset_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the dataset. If not provided, an ID will be generated.
type: object
required:
- purpose
- source
title: RegisterDatasetRequest
description: Request model for registering a dataset.
RegisterBenchmarkRequest:
properties:
benchmark_id:
@ -9208,6 +9253,28 @@ components:
- items
title: ConversationItemCreateRequest
type: object
GetDatasetRequest:
description: Request model for getting a dataset by ID.
properties:
dataset_id:
description: The ID of the dataset to get.
title: Dataset Id
type: string
required:
- dataset_id
title: GetDatasetRequest
type: object
UnregisterDatasetRequest:
description: Request model for unregistering a dataset.
properties:
dataset_id:
description: The ID of the dataset to unregister.
title: Dataset Id
type: string
required:
- dataset_id
title: UnregisterDatasetRequest
type: object
Api:
description: Enumeration of all available APIs in the Llama Stack system.
enum:

View file

@ -9186,9 +9186,11 @@ components:
type: string
const: dataset
title: Type
description: Type of resource, always 'dataset' for datasets
default: dataset
purpose:
$ref: '#/components/schemas/DatasetPurpose'
description: Purpose of the dataset indicating its intended use
source:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
@ -9196,6 +9198,7 @@ components:
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
description: Data source configuration for the dataset
discriminator:
propertyName: type
mapping:
@ -9220,6 +9223,7 @@ components:
type: string
const: rows
title: Type
description: The type of data source.
default: rows
rows:
items:
@ -9227,6 +9231,7 @@ components:
type: object
type: array
title: Rows
description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]'
type: object
required:
- rows
@ -9238,10 +9243,12 @@ components:
type: string
const: uri
title: Type
description: The type of data source.
default: uri
uri:
type: string
title: Uri
description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"
type: object
required:
- uri
@ -9254,6 +9261,7 @@ components:
$ref: '#/components/schemas/Dataset'
type: array
title: Data
description: List of datasets
type: object
required:
- data
@ -9965,6 +9973,41 @@ components:
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
RegisterDatasetRequest:
properties:
purpose:
$ref: '#/components/schemas/DatasetPurpose'
description: The purpose of the dataset.
source:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
description: The data source of the dataset.
discriminator:
propertyName: type
mapping:
rows: '#/components/schemas/RowsDataSource'
uri: '#/components/schemas/URIDataSource'
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
description: The metadata for the dataset.
dataset_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the dataset. If not provided, an ID will be generated.
type: object
required:
- purpose
- source
title: RegisterDatasetRequest
description: Request model for registering a dataset.
RegisterBenchmarkRequest:
properties:
benchmark_id:
@ -11797,6 +11840,28 @@ components:
- items
title: ConversationItemCreateRequest
type: object
GetDatasetRequest:
description: Request model for getting a dataset by ID.
properties:
dataset_id:
description: The ID of the dataset to get.
title: Dataset Id
type: string
required:
- dataset_id
title: GetDatasetRequest
type: object
UnregisterDatasetRequest:
description: Request model for unregistering a dataset.
properties:
dataset_id:
description: The ID of the dataset to unregister.
title: Dataset Id
type: string
required:
- dataset_id
title: UnregisterDatasetRequest
type: object
Api:
description: Enumeration of all available APIs in the Llama Stack system.
enum:

View file

@ -3268,7 +3268,7 @@ paths:
get:
responses:
'200':
description: A ListDatasetsResponse.
description: A list of dataset objects.
content:
application/json:
schema:
@ -3287,13 +3287,13 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: List Datasets
summary: List all datasets.
description: List all datasets.
operationId: list_datasets_v1beta_datasets_get
post:
responses:
'200':
description: A Dataset.
description: The registered dataset object.
content:
application/json:
schema:
@ -3312,7 +3312,7 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: Register Dataset
summary: Register a new dataset.
description: Register a new dataset.
operationId: register_dataset_v1beta_datasets_post
requestBody:
@ -3326,26 +3326,26 @@ paths:
get:
responses:
'200':
description: A Dataset.
description: The dataset object.
content:
application/json:
schema:
$ref: '#/components/schemas/Dataset'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
description: Default Response
tags:
- Datasets
summary: Get Dataset
summary: Get a dataset by its ID.
description: Get a dataset by its ID.
operationId: get_dataset_v1beta_datasets__dataset_id__get
parameters:
@ -3354,26 +3354,28 @@ paths:
required: true
schema:
type: string
description: 'Path parameter: dataset_id'
description: The ID of the dataset to get.
title: Dataset Id
description: The ID of the dataset to get.
delete:
responses:
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
description: Default Response
'204':
description: Successful Response
description: The dataset was successfully unregistered.
tags:
- Datasets
summary: Unregister Dataset
summary: Unregister a dataset by its ID.
description: Unregister a dataset by its ID.
operationId: unregister_dataset_v1beta_datasets__dataset_id__delete
parameters:
@ -3382,7 +3384,9 @@ paths:
required: true
schema:
type: string
description: 'Path parameter: dataset_id'
description: The ID of the dataset to unregister.
title: Dataset Id
description: The ID of the dataset to unregister.
deprecated: true
/v1alpha/eval/benchmarks:
get:
@ -10570,9 +10574,11 @@ components:
type: string
const: dataset
title: Type
description: Type of resource, always 'dataset' for datasets
default: dataset
purpose:
$ref: '#/components/schemas/DatasetPurpose'
description: Purpose of the dataset indicating its intended use
source:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
@ -10580,6 +10586,7 @@ components:
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
description: Data source configuration for the dataset
discriminator:
propertyName: type
mapping:
@ -10604,6 +10611,7 @@ components:
type: string
const: rows
title: Type
description: The type of data source.
default: rows
rows:
items:
@ -10611,6 +10619,7 @@ components:
type: object
type: array
title: Rows
description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]'
type: object
required:
- rows
@ -10622,10 +10631,12 @@ components:
type: string
const: uri
title: Type
description: The type of data source.
default: uri
uri:
type: string
title: Uri
description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"
type: object
required:
- uri
@ -10638,6 +10649,7 @@ components:
$ref: '#/components/schemas/Dataset'
type: array
title: Data
description: List of datasets
type: object
required:
- data
@ -11635,27 +11647,37 @@ components:
properties:
purpose:
$ref: '#/components/schemas/DatasetPurpose'
description: The purpose of the dataset.
source:
anyOf:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
description: The data source of the dataset.
discriminator:
propertyName: type
mapping:
rows: '#/components/schemas/RowsDataSource'
uri: '#/components/schemas/URIDataSource'
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
description: The metadata for the dataset.
dataset_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the dataset. If not provided, an ID will be generated.
type: object
required:
- purpose
- source
title: RegisterDatasetRequest
description: Request model for registering a dataset.
RegisterBenchmarkRequest:
properties:
benchmark_id:
@ -13488,6 +13510,28 @@ components:
- items
title: ConversationItemCreateRequest
type: object
GetDatasetRequest:
description: Request model for getting a dataset by ID.
properties:
dataset_id:
description: The ID of the dataset to get.
title: Dataset Id
type: string
required:
- dataset_id
title: GetDatasetRequest
type: object
UnregisterDatasetRequest:
description: Request model for unregistering a dataset.
properties:
dataset_id:
description: The ID of the dataset to unregister.
title: Dataset Id
type: string
required:
- dataset_id
title: UnregisterDatasetRequest
type: object
Api:
description: Enumeration of all available APIs in the Llama Stack system.
enum:

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import uuid
from typing import Any
from llama_stack.core.datatypes import (
DatasetWithOwner,
@ -14,15 +13,18 @@ from llama_stack.log import get_logger
from llama_stack_api import (
Dataset,
DatasetNotFoundError,
DatasetPurpose,
Datasets,
DatasetType,
DataSource,
ListDatasetsResponse,
ResourceType,
RowsDataSource,
URIDataSource,
)
from llama_stack_api.datasets.api import (
Datasets,
GetDatasetRequest,
RegisterDatasetRequest,
UnregisterDatasetRequest,
)
from .common import CommonRoutingTableImpl
@ -33,19 +35,17 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
async def list_datasets(self) -> ListDatasetsResponse:
return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value))
async def get_dataset(self, dataset_id: str) -> Dataset:
dataset = await self.get_object_by_identifier("dataset", dataset_id)
async def get_dataset(self, request: GetDatasetRequest) -> Dataset:
dataset = await self.get_object_by_identifier("dataset", request.dataset_id)
if dataset is None:
raise DatasetNotFoundError(dataset_id)
raise DatasetNotFoundError(request.dataset_id)
return dataset
async def register_dataset(
self,
purpose: DatasetPurpose,
source: DataSource,
metadata: dict[str, Any] | None = None,
dataset_id: str | None = None,
) -> Dataset:
async def register_dataset(self, request: RegisterDatasetRequest) -> Dataset:
purpose = request.purpose
source = request.source
metadata = request.metadata
dataset_id = request.dataset_id
if isinstance(source, dict):
if source["type"] == "uri":
source = URIDataSource.parse_obj(source)
@ -86,6 +86,6 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
await self.register_object(dataset)
return dataset
async def unregister_dataset(self, dataset_id: str) -> None:
dataset = await self.get_dataset(dataset_id)
async def unregister_dataset(self, request: UnregisterDatasetRequest) -> None:
dataset = await self.get_dataset(GetDatasetRequest(dataset_id=request.dataset_id))
await self.unregister_object(dataset)

View file

@ -17,7 +17,7 @@ from fastapi import APIRouter
from fastapi.routing import APIRoute
from starlette.routing import Route
from llama_stack_api import batches, benchmarks
from llama_stack_api import batches, benchmarks, datasets
# Router factories for APIs that have FastAPI routers
# Add new APIs here as they are migrated to the router system
@ -26,6 +26,7 @@ from llama_stack_api.datatypes import Api
_ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = {
"batches": batches.fastapi_routes.create_router,
"benchmarks": benchmarks.fastapi_routes.create_router,
"datasets": datasets.fastapi_routes.create_router,
}

View file

@ -1,248 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from enum import Enum, StrEnum
from typing import Annotated, Any, Literal, Protocol
from pydantic import BaseModel, Field
from llama_stack_api.resource import Resource, ResourceType
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
from llama_stack_api.version import LLAMA_STACK_API_V1BETA
class DatasetPurpose(StrEnum):
"""
Purpose of the dataset. Each purpose has a required input data schema.
:cvar post-training/messages: The dataset contains messages used for post-training.
{
"messages": [
{"role": "user", "content": "Hello, world!"},
{"role": "assistant", "content": "Hello, world!"},
]
}
:cvar eval/question-answer: The dataset contains a question column and an answer column.
{
"question": "What is the capital of France?",
"answer": "Paris"
}
:cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column.
{
"messages": [
{"role": "user", "content": "Hello, my name is John Doe."},
{"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
{"role": "user", "content": "What's my name?"},
],
"answer": "John Doe"
}
"""
post_training_messages = "post-training/messages"
eval_question_answer = "eval/question-answer"
eval_messages_answer = "eval/messages-answer"
# TODO: add more schemas here
class DatasetType(Enum):
"""
Type of the dataset source.
:cvar uri: The dataset can be obtained from a URI.
:cvar rows: The dataset is stored in rows.
"""
uri = "uri"
rows = "rows"
@json_schema_type
class URIDataSource(BaseModel):
"""A dataset that can be obtained from a URI.
:param uri: The dataset can be obtained from a URI. E.g.
- "https://mywebsite.com/mydata.jsonl"
- "lsfs://mydata.jsonl"
- "data:csv;base64,{base64_content}"
"""
type: Literal["uri"] = "uri"
uri: str
@json_schema_type
class RowsDataSource(BaseModel):
"""A dataset stored in rows.
:param rows: The dataset is stored in rows. E.g.
- [
{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}
]
"""
type: Literal["rows"] = "rows"
rows: list[dict[str, Any]]
DataSource = Annotated[
URIDataSource | RowsDataSource,
Field(discriminator="type"),
]
register_schema(DataSource, name="DataSource")
class CommonDatasetFields(BaseModel):
"""
Common fields for a dataset.
:param purpose: Purpose of the dataset indicating its intended use
:param source: Data source configuration for the dataset
:param metadata: Additional metadata for the dataset
"""
purpose: DatasetPurpose
source: DataSource
metadata: dict[str, Any] = Field(
default_factory=dict,
description="Any additional metadata for this dataset",
)
@json_schema_type
class Dataset(CommonDatasetFields, Resource):
"""Dataset resource for storing and accessing training or evaluation data.
:param type: Type of resource, always 'dataset' for datasets
"""
type: Literal[ResourceType.dataset] = ResourceType.dataset
@property
def dataset_id(self) -> str:
return self.identifier
@property
def provider_dataset_id(self) -> str | None:
return self.provider_resource_id
class DatasetInput(CommonDatasetFields, BaseModel):
"""Input parameters for dataset operations.
:param dataset_id: Unique identifier for the dataset
"""
dataset_id: str
@json_schema_type
class ListDatasetsResponse(BaseModel):
"""Response from listing datasets.
:param data: List of datasets
"""
data: list[Dataset]
class Datasets(Protocol):
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA, deprecated=True)
async def register_dataset(
self,
purpose: DatasetPurpose,
source: DataSource,
metadata: dict[str, Any] | None = None,
dataset_id: str | None = None,
) -> Dataset:
"""
Register a new dataset.
:param purpose: The purpose of the dataset.
One of:
- "post-training/messages": The dataset contains a messages column with list of messages for post-training.
{
"messages": [
{"role": "user", "content": "Hello, world!"},
{"role": "assistant", "content": "Hello, world!"},
]
}
- "eval/question-answer": The dataset contains a question column and an answer column for evaluation.
{
"question": "What is the capital of France?",
"answer": "Paris"
}
- "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column for evaluation.
{
"messages": [
{"role": "user", "content": "Hello, my name is John Doe."},
{"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
{"role": "user", "content": "What's my name?"},
],
"answer": "John Doe"
}
:param source: The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples:
- {
"type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl"
}
- {
"type": "uri",
"uri": "lsfs://mydata.jsonl"
}
- {
"type": "uri",
"uri": "data:csv;base64,{base64_content}"
}
- {
"type": "uri",
"uri": "huggingface://llamastack/simpleqa?split=train"
}
- {
"type": "rows",
"rows": [
{
"messages": [
{"role": "user", "content": "Hello, world!"},
{"role": "assistant", "content": "Hello, world!"},
]
}
]
}
:param metadata: The metadata for the dataset.
- E.g. {"description": "My dataset"}.
:param dataset_id: The ID of the dataset. If not provided, an ID will be generated.
:returns: A Dataset.
"""
...
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
async def get_dataset(
self,
dataset_id: str,
) -> Dataset:
"""Get a dataset by its ID.
:param dataset_id: The ID of the dataset to get.
:returns: A Dataset.
"""
...
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA)
async def list_datasets(self) -> ListDatasetsResponse:
"""List all datasets.
:returns: A ListDatasetsResponse.
"""
...
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA, deprecated=True)
async def unregister_dataset(
self,
dataset_id: str,
) -> None:
"""Unregister a dataset by its ID.
:param dataset_id: The ID of the dataset to unregister.
"""
...

View file

@ -0,0 +1,61 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""Datasets API protocol and models.
This module contains the Datasets protocol definition.
Pydantic models are defined in llama_stack_api.datasets.models.
The FastAPI router is defined in llama_stack_api.datasets.fastapi_routes.
"""
# Import fastapi_routes for router factory access
from . import fastapi_routes
# Import new protocol for FastAPI router
from .api import Datasets
# Import models for re-export
from .models import (
CommonDatasetFields,
Dataset,
DatasetPurpose,
DatasetType,
DataSource,
GetDatasetRequest,
ListDatasetsResponse,
RegisterDatasetRequest,
RowsDataSource,
UnregisterDatasetRequest,
URIDataSource,
)
# Define DatasetInput for backward compatibility
class DatasetInput(CommonDatasetFields):
"""Input parameters for dataset operations.
:param dataset_id: Unique identifier for the dataset
"""
dataset_id: str
__all__ = [
"Datasets",
"Dataset",
"CommonDatasetFields",
"DatasetPurpose",
"DataSource",
"DatasetInput",
"DatasetType",
"RowsDataSource",
"URIDataSource",
"ListDatasetsResponse",
"RegisterDatasetRequest",
"GetDatasetRequest",
"UnregisterDatasetRequest",
"fastapi_routes",
]

View file

@ -0,0 +1,35 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""Datasets API protocol definition.
This module contains the Datasets protocol definition.
Pydantic models are defined in llama_stack_api.datasets.models.
The FastAPI router is defined in llama_stack_api.datasets.fastapi_routes.
"""
from typing import Protocol, runtime_checkable
from .models import (
Dataset,
GetDatasetRequest,
ListDatasetsResponse,
RegisterDatasetRequest,
UnregisterDatasetRequest,
)
@runtime_checkable
class Datasets(Protocol):
"""Protocol for dataset management operations."""
async def register_dataset(self, request: RegisterDatasetRequest) -> Dataset: ...
async def get_dataset(self, request: GetDatasetRequest) -> Dataset: ...
async def list_datasets(self) -> ListDatasetsResponse: ...
async def unregister_dataset(self, request: UnregisterDatasetRequest) -> None: ...

View file

@ -0,0 +1,104 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""FastAPI router for the Datasets API.
This module defines the FastAPI router for the Datasets API using standard
FastAPI route decorators.
"""
from typing import Annotated
from fastapi import APIRouter, Body, Depends
from llama_stack_api.router_utils import create_path_dependency, standard_responses
from llama_stack_api.version import LLAMA_STACK_API_V1BETA
from .api import Datasets
from .models import (
Dataset,
GetDatasetRequest,
ListDatasetsResponse,
RegisterDatasetRequest,
UnregisterDatasetRequest,
)
# Path parameter dependencies for single-field models
get_dataset_request = create_path_dependency(GetDatasetRequest)
unregister_dataset_request = create_path_dependency(UnregisterDatasetRequest)
def create_router(impl: Datasets) -> APIRouter:
"""Create a FastAPI router for the Datasets API.
Args:
impl: The Datasets implementation instance
Returns:
APIRouter configured for the Datasets API
"""
router = APIRouter(
prefix=f"/{LLAMA_STACK_API_V1BETA}",
tags=["Datasets"],
responses=standard_responses,
)
@router.post(
"/datasets",
response_model=Dataset,
summary="Register a new dataset.",
description="Register a new dataset.",
responses={
200: {"description": "The registered dataset object."},
},
deprecated=True,
)
async def register_dataset(
request: Annotated[RegisterDatasetRequest, Body(...)],
) -> Dataset:
return await impl.register_dataset(request)
@router.get(
"/datasets/{dataset_id:path}",
response_model=Dataset,
summary="Get a dataset by its ID.",
description="Get a dataset by its ID.",
responses={
200: {"description": "The dataset object."},
},
)
async def get_dataset(
request: Annotated[GetDatasetRequest, Depends(get_dataset_request)],
) -> Dataset:
return await impl.get_dataset(request)
@router.get(
"/datasets",
response_model=ListDatasetsResponse,
summary="List all datasets.",
description="List all datasets.",
responses={
200: {"description": "A list of dataset objects."},
},
)
async def list_datasets() -> ListDatasetsResponse:
return await impl.list_datasets()
@router.delete(
"/datasets/{dataset_id:path}",
summary="Unregister a dataset by its ID.",
description="Unregister a dataset by its ID.",
responses={
200: {"description": "The dataset was successfully unregistered."},
},
deprecated=True,
)
async def unregister_dataset(
request: Annotated[UnregisterDatasetRequest, Depends(unregister_dataset_request)],
) -> None:
return await impl.unregister_dataset(request)
return router

View file

@ -0,0 +1,152 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""Pydantic models for Datasets API requests and responses.
This module defines the request and response models for the Datasets API
using Pydantic with Field descriptions for OpenAPI schema generation.
"""
from enum import Enum, StrEnum
from typing import Annotated, Any, Literal
from pydantic import BaseModel, Field
from llama_stack_api.resource import Resource, ResourceType
from llama_stack_api.schema_utils import json_schema_type, register_schema
class DatasetPurpose(StrEnum):
"""Purpose of the dataset. Each purpose has a required input data schema."""
post_training_messages = "post-training/messages"
"""The dataset contains messages used for post-training."""
eval_question_answer = "eval/question-answer"
"""The dataset contains a question column and an answer column."""
eval_messages_answer = "eval/messages-answer"
"""The dataset contains a messages column with list of messages and an answer column."""
class DatasetType(Enum):
"""Type of the dataset source."""
uri = "uri"
"""The dataset can be obtained from a URI."""
rows = "rows"
"""The dataset is stored in rows."""
@json_schema_type
class URIDataSource(BaseModel):
"""A dataset that can be obtained from a URI."""
type: Literal["uri"] = Field(default="uri", description="The type of data source.")
uri: str = Field(
...,
description='The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"',
)
@json_schema_type
class RowsDataSource(BaseModel):
"""A dataset stored in rows."""
type: Literal["rows"] = Field(default="rows", description="The type of data source.")
rows: list[dict[str, Any]] = Field(
...,
description='The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]',
)
DataSource = Annotated[
URIDataSource | RowsDataSource,
Field(discriminator="type"),
]
register_schema(DataSource, name="DataSource")
class CommonDatasetFields(BaseModel):
"""Common fields for a dataset."""
purpose: DatasetPurpose = Field(..., description="Purpose of the dataset indicating its intended use")
source: DataSource = Field(..., description="Data source configuration for the dataset")
metadata: dict[str, Any] = Field(
default_factory=dict,
description="Any additional metadata for this dataset",
)
@json_schema_type
class Dataset(CommonDatasetFields, Resource):
"""Dataset resource for storing and accessing training or evaluation data."""
type: Literal[ResourceType.dataset] = Field(
default=ResourceType.dataset,
description="Type of resource, always 'dataset' for datasets",
)
@property
def dataset_id(self) -> str:
return self.identifier
@property
def provider_dataset_id(self) -> str | None:
return self.provider_resource_id
@json_schema_type
class ListDatasetsResponse(BaseModel):
"""Response from listing datasets."""
data: list[Dataset] = Field(..., description="List of datasets")
# Request models for each endpoint
@json_schema_type
class RegisterDatasetRequest(BaseModel):
"""Request model for registering a dataset."""
purpose: DatasetPurpose = Field(..., description="The purpose of the dataset.")
source: DataSource = Field(..., description="The data source of the dataset.")
metadata: dict[str, Any] | None = Field(
default=None,
description="The metadata for the dataset.",
)
dataset_id: str | None = Field(
default=None,
description="The ID of the dataset. If not provided, an ID will be generated.",
)
@json_schema_type
class GetDatasetRequest(BaseModel):
"""Request model for getting a dataset by ID."""
dataset_id: str = Field(..., description="The ID of the dataset to get.")
@json_schema_type
class UnregisterDatasetRequest(BaseModel):
"""Request model for unregistering a dataset."""
dataset_id: str = Field(..., description="The ID of the dataset to unregister.")
__all__ = [
"CommonDatasetFields",
"Dataset",
"DatasetPurpose",
"DatasetType",
"DataSource",
"RowsDataSource",
"URIDataSource",
"ListDatasetsResponse",
"RegisterDatasetRequest",
"GetDatasetRequest",
"UnregisterDatasetRequest",
]

View file

@ -35,6 +35,10 @@ from llama_stack_api import (
UnregisterBenchmarkRequest,
URIDataSource,
)
from llama_stack_api.datasets import (
RegisterDatasetRequest,
UnregisterDatasetRequest,
)
class Impl:
@ -261,10 +265,18 @@ async def test_datasets_routing_table(cached_disk_dist_registry):
# Register multiple datasets and verify listing
await table.register_dataset(
dataset_id="test-dataset", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource(uri="test-uri")
RegisterDatasetRequest(
dataset_id="test-dataset",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(uri="test-uri"),
)
)
await table.register_dataset(
dataset_id="test-dataset-2", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource(uri="test-uri-2")
RegisterDatasetRequest(
dataset_id="test-dataset-2",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(uri="test-uri-2"),
)
)
datasets = await table.list_datasets()
@ -273,8 +285,8 @@ async def test_datasets_routing_table(cached_disk_dist_registry):
assert "test-dataset" in dataset_ids
assert "test-dataset-2" in dataset_ids
await table.unregister_dataset(dataset_id="test-dataset")
await table.unregister_dataset(dataset_id="test-dataset-2")
await table.unregister_dataset(UnregisterDatasetRequest(dataset_id="test-dataset"))
await table.unregister_dataset(UnregisterDatasetRequest(dataset_id="test-dataset-2"))
datasets = await table.list_datasets()
assert len(datasets.data) == 0