feat: convert Datasets API to use FastAPI router (#4359)

# What does this PR do?

Convert the Datasets API from webmethod decorators to FastAPI router
pattern.

Fixes: https://github.com/llamastack/llama-stack/issues/4344

## Test Plan
CI

Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
Sébastien Han 2025-12-15 20:23:04 +01:00 committed by GitHub
parent 56f946f3f5
commit 700663028f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 716 additions and 335 deletions

View file

@ -3268,7 +3268,7 @@ paths:
get: get:
responses: responses:
'200': '200':
description: A ListDatasetsResponse. description: A list of dataset objects.
content: content:
application/json: application/json:
schema: schema:
@ -3287,13 +3287,13 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- Datasets - Datasets
summary: List Datasets summary: List all datasets.
description: List all datasets. description: List all datasets.
operationId: list_datasets_v1beta_datasets_get operationId: list_datasets_v1beta_datasets_get
post: post:
responses: responses:
'200': '200':
description: A Dataset. description: The registered dataset object.
content: content:
application/json: application/json:
schema: schema:
@ -3312,7 +3312,7 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- Datasets - Datasets
summary: Register Dataset summary: Register a new dataset.
description: Register a new dataset. description: Register a new dataset.
operationId: register_dataset_v1beta_datasets_post operationId: register_dataset_v1beta_datasets_post
requestBody: requestBody:
@ -3326,26 +3326,26 @@ paths:
get: get:
responses: responses:
'200': '200':
description: A Dataset. description: The dataset object.
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/Dataset' $ref: '#/components/schemas/Dataset'
'400': '400':
description: Bad Request
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
description: Bad Request
'429': '429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429' $ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500': '500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500' $ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default: default:
description: Default Response
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
description: Default Response
tags: tags:
- Datasets - Datasets
summary: Get Dataset summary: Get a dataset by its ID.
description: Get a dataset by its ID. description: Get a dataset by its ID.
operationId: get_dataset_v1beta_datasets__dataset_id__get operationId: get_dataset_v1beta_datasets__dataset_id__get
parameters: parameters:
@ -3354,26 +3354,28 @@ paths:
required: true required: true
schema: schema:
type: string type: string
description: 'Path parameter: dataset_id' description: The ID of the dataset to get.
title: Dataset Id
description: The ID of the dataset to get.
delete: delete:
responses: responses:
'400': '400':
description: Bad Request
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
description: Bad Request
'429': '429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429' $ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500': '500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500' $ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default: default:
description: Default Response
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
description: Default Response
'204': '204':
description: Successful Response description: The dataset was successfully unregistered.
tags: tags:
- Datasets - Datasets
summary: Unregister Dataset summary: Unregister a dataset by its ID.
description: Unregister a dataset by its ID. description: Unregister a dataset by its ID.
operationId: unregister_dataset_v1beta_datasets__dataset_id__delete operationId: unregister_dataset_v1beta_datasets__dataset_id__delete
parameters: parameters:
@ -3382,7 +3384,9 @@ paths:
required: true required: true
schema: schema:
type: string type: string
description: 'Path parameter: dataset_id' description: The ID of the dataset to unregister.
title: Dataset Id
description: The ID of the dataset to unregister.
deprecated: true deprecated: true
/v1alpha/eval/benchmarks: /v1alpha/eval/benchmarks:
get: get:
@ -10570,9 +10574,11 @@ components:
type: string type: string
const: dataset const: dataset
title: Type title: Type
description: Type of resource, always 'dataset' for datasets
default: dataset default: dataset
purpose: purpose:
$ref: '#/components/schemas/DatasetPurpose' $ref: '#/components/schemas/DatasetPurpose'
description: Purpose of the dataset indicating its intended use
source: source:
oneOf: oneOf:
- $ref: '#/components/schemas/URIDataSource' - $ref: '#/components/schemas/URIDataSource'
@ -10580,6 +10586,7 @@ components:
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
description: Data source configuration for the dataset
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
@ -10604,6 +10611,7 @@ components:
type: string type: string
const: rows const: rows
title: Type title: Type
description: The type of data source.
default: rows default: rows
rows: rows:
items: items:
@ -10611,6 +10619,7 @@ components:
type: object type: object
type: array type: array
title: Rows title: Rows
description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]'
type: object type: object
required: required:
- rows - rows
@ -10622,10 +10631,12 @@ components:
type: string type: string
const: uri const: uri
title: Type title: Type
description: The type of data source.
default: uri default: uri
uri: uri:
type: string type: string
title: Uri title: Uri
description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"
type: object type: object
required: required:
- uri - uri
@ -10638,6 +10649,7 @@ components:
$ref: '#/components/schemas/Dataset' $ref: '#/components/schemas/Dataset'
type: array type: array
title: Data title: Data
description: List of datasets
type: object type: object
required: required:
- data - data
@ -11635,27 +11647,37 @@ components:
properties: properties:
purpose: purpose:
$ref: '#/components/schemas/DatasetPurpose' $ref: '#/components/schemas/DatasetPurpose'
description: The purpose of the dataset.
source: source:
anyOf: oneOf:
- $ref: '#/components/schemas/URIDataSource' - $ref: '#/components/schemas/URIDataSource'
title: URIDataSource title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
description: The data source of the dataset.
discriminator:
propertyName: type
mapping:
rows: '#/components/schemas/RowsDataSource'
uri: '#/components/schemas/URIDataSource'
metadata: metadata:
anyOf: anyOf:
- additionalProperties: true - additionalProperties: true
type: object type: object
- type: 'null' - type: 'null'
description: The metadata for the dataset.
dataset_id: dataset_id:
anyOf: anyOf:
- type: string - type: string
- type: 'null' - type: 'null'
description: The ID of the dataset. If not provided, an ID will be generated.
type: object type: object
required: required:
- purpose - purpose
- source - source
title: RegisterDatasetRequest title: RegisterDatasetRequest
description: Request model for registering a dataset.
RegisterBenchmarkRequest: RegisterBenchmarkRequest:
properties: properties:
benchmark_id: benchmark_id:
@ -13488,6 +13510,28 @@ components:
- items - items
title: ConversationItemCreateRequest title: ConversationItemCreateRequest
type: object type: object
GetDatasetRequest:
description: Request model for getting a dataset by ID.
properties:
dataset_id:
description: The ID of the dataset to get.
title: Dataset Id
type: string
required:
- dataset_id
title: GetDatasetRequest
type: object
UnregisterDatasetRequest:
description: Request model for unregistering a dataset.
properties:
dataset_id:
description: The ID of the dataset to unregister.
title: Dataset Id
type: string
required:
- dataset_id
title: UnregisterDatasetRequest
type: object
Api: Api:
description: Enumeration of all available APIs in the Llama Stack system. description: Enumeration of all available APIs in the Llama Stack system.
enum: enum:

View file

@ -655,7 +655,7 @@ paths:
get: get:
responses: responses:
'200': '200':
description: A ListDatasetsResponse. description: A list of dataset objects.
content: content:
application/json: application/json:
schema: schema:
@ -674,13 +674,13 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- Datasets - Datasets
summary: List Datasets summary: List all datasets.
description: List all datasets. description: List all datasets.
operationId: list_datasets_v1beta_datasets_get operationId: list_datasets_v1beta_datasets_get
post: post:
responses: responses:
'200': '200':
description: A Dataset. description: The registered dataset object.
content: content:
application/json: application/json:
schema: schema:
@ -699,7 +699,7 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- Datasets - Datasets
summary: Register Dataset summary: Register a new dataset.
description: Register a new dataset. description: Register a new dataset.
operationId: register_dataset_v1beta_datasets_post operationId: register_dataset_v1beta_datasets_post
requestBody: requestBody:
@ -713,26 +713,26 @@ paths:
get: get:
responses: responses:
'200': '200':
description: A Dataset. description: The dataset object.
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/Dataset' $ref: '#/components/schemas/Dataset'
'400': '400':
description: Bad Request
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
description: Bad Request
'429': '429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429' $ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500': '500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500' $ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default: default:
description: Default Response
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
description: Default Response
tags: tags:
- Datasets - Datasets
summary: Get Dataset summary: Get a dataset by its ID.
description: Get a dataset by its ID. description: Get a dataset by its ID.
operationId: get_dataset_v1beta_datasets__dataset_id__get operationId: get_dataset_v1beta_datasets__dataset_id__get
parameters: parameters:
@ -741,26 +741,28 @@ paths:
required: true required: true
schema: schema:
type: string type: string
description: 'Path parameter: dataset_id' description: The ID of the dataset to get.
title: Dataset Id
description: The ID of the dataset to get.
delete: delete:
responses: responses:
'400': '400':
description: Bad Request
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
description: Bad Request
'429': '429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429' $ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500': '500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500' $ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default: default:
description: Default Response
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
description: Default Response
'204': '204':
description: Successful Response description: The dataset was successfully unregistered.
tags: tags:
- Datasets - Datasets
summary: Unregister Dataset summary: Unregister a dataset by its ID.
description: Unregister a dataset by its ID. description: Unregister a dataset by its ID.
operationId: unregister_dataset_v1beta_datasets__dataset_id__delete operationId: unregister_dataset_v1beta_datasets__dataset_id__delete
parameters: parameters:
@ -769,7 +771,9 @@ paths:
required: true required: true
schema: schema:
type: string type: string
description: 'Path parameter: dataset_id' description: The ID of the dataset to unregister.
title: Dataset Id
description: The ID of the dataset to unregister.
deprecated: true deprecated: true
/v1alpha/eval/benchmarks: /v1alpha/eval/benchmarks:
get: get:
@ -7396,9 +7400,11 @@ components:
type: string type: string
const: dataset const: dataset
title: Type title: Type
description: Type of resource, always 'dataset' for datasets
default: dataset default: dataset
purpose: purpose:
$ref: '#/components/schemas/DatasetPurpose' $ref: '#/components/schemas/DatasetPurpose'
description: Purpose of the dataset indicating its intended use
source: source:
oneOf: oneOf:
- $ref: '#/components/schemas/URIDataSource' - $ref: '#/components/schemas/URIDataSource'
@ -7406,6 +7412,7 @@ components:
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
description: Data source configuration for the dataset
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
@ -7430,6 +7437,7 @@ components:
type: string type: string
const: rows const: rows
title: Type title: Type
description: The type of data source.
default: rows default: rows
rows: rows:
items: items:
@ -7437,6 +7445,7 @@ components:
type: object type: object
type: array type: array
title: Rows title: Rows
description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]'
type: object type: object
required: required:
- rows - rows
@ -7448,10 +7457,12 @@ components:
type: string type: string
const: uri const: uri
title: Type title: Type
description: The type of data source.
default: uri default: uri
uri: uri:
type: string type: string
title: Uri title: Uri
description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"
type: object type: object
required: required:
- uri - uri
@ -7464,6 +7475,7 @@ components:
$ref: '#/components/schemas/Dataset' $ref: '#/components/schemas/Dataset'
type: array type: array
title: Data title: Data
description: List of datasets
type: object type: object
required: required:
- data - data
@ -8461,27 +8473,37 @@ components:
properties: properties:
purpose: purpose:
$ref: '#/components/schemas/DatasetPurpose' $ref: '#/components/schemas/DatasetPurpose'
description: The purpose of the dataset.
source: source:
anyOf: oneOf:
- $ref: '#/components/schemas/URIDataSource' - $ref: '#/components/schemas/URIDataSource'
title: URIDataSource title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
description: The data source of the dataset.
discriminator:
propertyName: type
mapping:
rows: '#/components/schemas/RowsDataSource'
uri: '#/components/schemas/URIDataSource'
metadata: metadata:
anyOf: anyOf:
- additionalProperties: true - additionalProperties: true
type: object type: object
- type: 'null' - type: 'null'
description: The metadata for the dataset.
dataset_id: dataset_id:
anyOf: anyOf:
- type: string - type: string
- type: 'null' - type: 'null'
description: The ID of the dataset. If not provided, an ID will be generated.
type: object type: object
required: required:
- purpose - purpose
- source - source
title: RegisterDatasetRequest title: RegisterDatasetRequest
description: Request model for registering a dataset.
RegisterBenchmarkRequest: RegisterBenchmarkRequest:
properties: properties:
benchmark_id: benchmark_id:
@ -10314,6 +10336,28 @@ components:
- items - items
title: ConversationItemCreateRequest title: ConversationItemCreateRequest
type: object type: object
GetDatasetRequest:
description: Request model for getting a dataset by ID.
properties:
dataset_id:
description: The ID of the dataset to get.
title: Dataset Id
type: string
required:
- dataset_id
title: GetDatasetRequest
type: object
UnregisterDatasetRequest:
description: Request model for unregistering a dataset.
properties:
dataset_id:
description: The ID of the dataset to unregister.
title: Dataset Id
type: string
required:
- dataset_id
title: UnregisterDatasetRequest
type: object
Api: Api:
description: Enumeration of all available APIs in the Llama Stack system. description: Enumeration of all available APIs in the Llama Stack system.
enum: enum:

View file

@ -110,7 +110,7 @@ paths:
get: get:
responses: responses:
'200': '200':
description: A ListDatasetsResponse. description: A list of dataset objects.
content: content:
application/json: application/json:
schema: schema:
@ -129,33 +129,33 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- Datasets - Datasets
summary: List Datasets summary: List all datasets.
description: List all datasets. description: List all datasets.
operationId: list_datasets_v1beta_datasets_get operationId: list_datasets_v1beta_datasets_get
/v1beta/datasets/{dataset_id}: /v1beta/datasets/{dataset_id}:
get: get:
responses: responses:
'200': '200':
description: A Dataset. description: The dataset object.
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/Dataset' $ref: '#/components/schemas/Dataset'
'400': '400':
description: Bad Request
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
description: Bad Request
'429': '429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429' $ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500': '500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500' $ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default: default:
description: Default Response
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
description: Default Response
tags: tags:
- Datasets - Datasets
summary: Get Dataset summary: Get a dataset by its ID.
description: Get a dataset by its ID. description: Get a dataset by its ID.
operationId: get_dataset_v1beta_datasets__dataset_id__get operationId: get_dataset_v1beta_datasets__dataset_id__get
parameters: parameters:
@ -164,7 +164,9 @@ paths:
required: true required: true
schema: schema:
type: string type: string
description: 'Path parameter: dataset_id' description: The ID of the dataset to get.
title: Dataset Id
description: The ID of the dataset to get.
/v1alpha/eval/benchmarks: /v1alpha/eval/benchmarks:
get: get:
responses: responses:
@ -6659,9 +6661,11 @@ components:
type: string type: string
const: dataset const: dataset
title: Type title: Type
description: Type of resource, always 'dataset' for datasets
default: dataset default: dataset
purpose: purpose:
$ref: '#/components/schemas/DatasetPurpose' $ref: '#/components/schemas/DatasetPurpose'
description: Purpose of the dataset indicating its intended use
source: source:
oneOf: oneOf:
- $ref: '#/components/schemas/URIDataSource' - $ref: '#/components/schemas/URIDataSource'
@ -6669,6 +6673,7 @@ components:
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
description: Data source configuration for the dataset
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
@ -6693,6 +6698,7 @@ components:
type: string type: string
const: rows const: rows
title: Type title: Type
description: The type of data source.
default: rows default: rows
rows: rows:
items: items:
@ -6700,6 +6706,7 @@ components:
type: object type: object
type: array type: array
title: Rows title: Rows
description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]'
type: object type: object
required: required:
- rows - rows
@ -6711,10 +6718,12 @@ components:
type: string type: string
const: uri const: uri
title: Type title: Type
description: The type of data source.
default: uri default: uri
uri: uri:
type: string type: string
title: Uri title: Uri
description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"
type: object type: object
required: required:
- uri - uri
@ -6727,6 +6736,7 @@ components:
$ref: '#/components/schemas/Dataset' $ref: '#/components/schemas/Dataset'
type: array type: array
title: Data title: Data
description: List of datasets
type: object type: object
required: required:
- data - data
@ -7585,6 +7595,41 @@ components:
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
RegisterDatasetRequest:
properties:
purpose:
$ref: '#/components/schemas/DatasetPurpose'
description: The purpose of the dataset.
source:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
description: The data source of the dataset.
discriminator:
propertyName: type
mapping:
rows: '#/components/schemas/RowsDataSource'
uri: '#/components/schemas/URIDataSource'
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
description: The metadata for the dataset.
dataset_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the dataset. If not provided, an ID will be generated.
type: object
required:
- purpose
- source
title: RegisterDatasetRequest
description: Request model for registering a dataset.
RegisterBenchmarkRequest: RegisterBenchmarkRequest:
properties: properties:
benchmark_id: benchmark_id:
@ -9208,6 +9253,28 @@ components:
- items - items
title: ConversationItemCreateRequest title: ConversationItemCreateRequest
type: object type: object
GetDatasetRequest:
description: Request model for getting a dataset by ID.
properties:
dataset_id:
description: The ID of the dataset to get.
title: Dataset Id
type: string
required:
- dataset_id
title: GetDatasetRequest
type: object
UnregisterDatasetRequest:
description: Request model for unregistering a dataset.
properties:
dataset_id:
description: The ID of the dataset to unregister.
title: Dataset Id
type: string
required:
- dataset_id
title: UnregisterDatasetRequest
type: object
Api: Api:
description: Enumeration of all available APIs in the Llama Stack system. description: Enumeration of all available APIs in the Llama Stack system.
enum: enum:

View file

@ -9186,9 +9186,11 @@ components:
type: string type: string
const: dataset const: dataset
title: Type title: Type
description: Type of resource, always 'dataset' for datasets
default: dataset default: dataset
purpose: purpose:
$ref: '#/components/schemas/DatasetPurpose' $ref: '#/components/schemas/DatasetPurpose'
description: Purpose of the dataset indicating its intended use
source: source:
oneOf: oneOf:
- $ref: '#/components/schemas/URIDataSource' - $ref: '#/components/schemas/URIDataSource'
@ -9196,6 +9198,7 @@ components:
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
description: Data source configuration for the dataset
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
@ -9220,6 +9223,7 @@ components:
type: string type: string
const: rows const: rows
title: Type title: Type
description: The type of data source.
default: rows default: rows
rows: rows:
items: items:
@ -9227,6 +9231,7 @@ components:
type: object type: object
type: array type: array
title: Rows title: Rows
description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]'
type: object type: object
required: required:
- rows - rows
@ -9238,10 +9243,12 @@ components:
type: string type: string
const: uri const: uri
title: Type title: Type
description: The type of data source.
default: uri default: uri
uri: uri:
type: string type: string
title: Uri title: Uri
description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"
type: object type: object
required: required:
- uri - uri
@ -9254,6 +9261,7 @@ components:
$ref: '#/components/schemas/Dataset' $ref: '#/components/schemas/Dataset'
type: array type: array
title: Data title: Data
description: List of datasets
type: object type: object
required: required:
- data - data
@ -9965,6 +9973,41 @@ components:
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
RegisterDatasetRequest:
properties:
purpose:
$ref: '#/components/schemas/DatasetPurpose'
description: The purpose of the dataset.
source:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
description: The data source of the dataset.
discriminator:
propertyName: type
mapping:
rows: '#/components/schemas/RowsDataSource'
uri: '#/components/schemas/URIDataSource'
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
description: The metadata for the dataset.
dataset_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the dataset. If not provided, an ID will be generated.
type: object
required:
- purpose
- source
title: RegisterDatasetRequest
description: Request model for registering a dataset.
RegisterBenchmarkRequest: RegisterBenchmarkRequest:
properties: properties:
benchmark_id: benchmark_id:
@ -11797,6 +11840,28 @@ components:
- items - items
title: ConversationItemCreateRequest title: ConversationItemCreateRequest
type: object type: object
GetDatasetRequest:
description: Request model for getting a dataset by ID.
properties:
dataset_id:
description: The ID of the dataset to get.
title: Dataset Id
type: string
required:
- dataset_id
title: GetDatasetRequest
type: object
UnregisterDatasetRequest:
description: Request model for unregistering a dataset.
properties:
dataset_id:
description: The ID of the dataset to unregister.
title: Dataset Id
type: string
required:
- dataset_id
title: UnregisterDatasetRequest
type: object
Api: Api:
description: Enumeration of all available APIs in the Llama Stack system. description: Enumeration of all available APIs in the Llama Stack system.
enum: enum:

View file

@ -3268,7 +3268,7 @@ paths:
get: get:
responses: responses:
'200': '200':
description: A ListDatasetsResponse. description: A list of dataset objects.
content: content:
application/json: application/json:
schema: schema:
@ -3287,13 +3287,13 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- Datasets - Datasets
summary: List Datasets summary: List all datasets.
description: List all datasets. description: List all datasets.
operationId: list_datasets_v1beta_datasets_get operationId: list_datasets_v1beta_datasets_get
post: post:
responses: responses:
'200': '200':
description: A Dataset. description: The registered dataset object.
content: content:
application/json: application/json:
schema: schema:
@ -3312,7 +3312,7 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- Datasets - Datasets
summary: Register Dataset summary: Register a new dataset.
description: Register a new dataset. description: Register a new dataset.
operationId: register_dataset_v1beta_datasets_post operationId: register_dataset_v1beta_datasets_post
requestBody: requestBody:
@ -3326,26 +3326,26 @@ paths:
get: get:
responses: responses:
'200': '200':
description: A Dataset. description: The dataset object.
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/Dataset' $ref: '#/components/schemas/Dataset'
'400': '400':
description: Bad Request
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
description: Bad Request
'429': '429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429' $ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500': '500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500' $ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default: default:
description: Default Response
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
description: Default Response
tags: tags:
- Datasets - Datasets
summary: Get Dataset summary: Get a dataset by its ID.
description: Get a dataset by its ID. description: Get a dataset by its ID.
operationId: get_dataset_v1beta_datasets__dataset_id__get operationId: get_dataset_v1beta_datasets__dataset_id__get
parameters: parameters:
@ -3354,26 +3354,28 @@ paths:
required: true required: true
schema: schema:
type: string type: string
description: 'Path parameter: dataset_id' description: The ID of the dataset to get.
title: Dataset Id
description: The ID of the dataset to get.
delete: delete:
responses: responses:
'400': '400':
description: Bad Request
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
description: Bad Request
'429': '429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429' $ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500': '500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500' $ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default: default:
description: Default Response
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
description: Default Response
'204': '204':
description: Successful Response description: The dataset was successfully unregistered.
tags: tags:
- Datasets - Datasets
summary: Unregister Dataset summary: Unregister a dataset by its ID.
description: Unregister a dataset by its ID. description: Unregister a dataset by its ID.
operationId: unregister_dataset_v1beta_datasets__dataset_id__delete operationId: unregister_dataset_v1beta_datasets__dataset_id__delete
parameters: parameters:
@ -3382,7 +3384,9 @@ paths:
required: true required: true
schema: schema:
type: string type: string
description: 'Path parameter: dataset_id' description: The ID of the dataset to unregister.
title: Dataset Id
description: The ID of the dataset to unregister.
deprecated: true deprecated: true
/v1alpha/eval/benchmarks: /v1alpha/eval/benchmarks:
get: get:
@ -10570,9 +10574,11 @@ components:
type: string type: string
const: dataset const: dataset
title: Type title: Type
description: Type of resource, always 'dataset' for datasets
default: dataset default: dataset
purpose: purpose:
$ref: '#/components/schemas/DatasetPurpose' $ref: '#/components/schemas/DatasetPurpose'
description: Purpose of the dataset indicating its intended use
source: source:
oneOf: oneOf:
- $ref: '#/components/schemas/URIDataSource' - $ref: '#/components/schemas/URIDataSource'
@ -10580,6 +10586,7 @@ components:
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
description: Data source configuration for the dataset
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
@ -10604,6 +10611,7 @@ components:
type: string type: string
const: rows const: rows
title: Type title: Type
description: The type of data source.
default: rows default: rows
rows: rows:
items: items:
@ -10611,6 +10619,7 @@ components:
type: object type: object
type: array type: array
title: Rows title: Rows
description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]'
type: object type: object
required: required:
- rows - rows
@ -10622,10 +10631,12 @@ components:
type: string type: string
const: uri const: uri
title: Type title: Type
description: The type of data source.
default: uri default: uri
uri: uri:
type: string type: string
title: Uri title: Uri
description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"
type: object type: object
required: required:
- uri - uri
@ -10638,6 +10649,7 @@ components:
$ref: '#/components/schemas/Dataset' $ref: '#/components/schemas/Dataset'
type: array type: array
title: Data title: Data
description: List of datasets
type: object type: object
required: required:
- data - data
@ -11635,27 +11647,37 @@ components:
properties: properties:
purpose: purpose:
$ref: '#/components/schemas/DatasetPurpose' $ref: '#/components/schemas/DatasetPurpose'
description: The purpose of the dataset.
source: source:
anyOf: oneOf:
- $ref: '#/components/schemas/URIDataSource' - $ref: '#/components/schemas/URIDataSource'
title: URIDataSource title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
description: The data source of the dataset.
discriminator:
propertyName: type
mapping:
rows: '#/components/schemas/RowsDataSource'
uri: '#/components/schemas/URIDataSource'
metadata: metadata:
anyOf: anyOf:
- additionalProperties: true - additionalProperties: true
type: object type: object
- type: 'null' - type: 'null'
description: The metadata for the dataset.
dataset_id: dataset_id:
anyOf: anyOf:
- type: string - type: string
- type: 'null' - type: 'null'
description: The ID of the dataset. If not provided, an ID will be generated.
type: object type: object
required: required:
- purpose - purpose
- source - source
title: RegisterDatasetRequest title: RegisterDatasetRequest
description: Request model for registering a dataset.
RegisterBenchmarkRequest: RegisterBenchmarkRequest:
properties: properties:
benchmark_id: benchmark_id:
@ -13488,6 +13510,28 @@ components:
- items - items
title: ConversationItemCreateRequest title: ConversationItemCreateRequest
type: object type: object
GetDatasetRequest:
description: Request model for getting a dataset by ID.
properties:
dataset_id:
description: The ID of the dataset to get.
title: Dataset Id
type: string
required:
- dataset_id
title: GetDatasetRequest
type: object
UnregisterDatasetRequest:
description: Request model for unregistering a dataset.
properties:
dataset_id:
description: The ID of the dataset to unregister.
title: Dataset Id
type: string
required:
- dataset_id
title: UnregisterDatasetRequest
type: object
Api: Api:
description: Enumeration of all available APIs in the Llama Stack system. description: Enumeration of all available APIs in the Llama Stack system.
enum: enum:

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import uuid import uuid
from typing import Any
from llama_stack.core.datatypes import ( from llama_stack.core.datatypes import (
DatasetWithOwner, DatasetWithOwner,
@ -14,15 +13,18 @@ from llama_stack.log import get_logger
from llama_stack_api import ( from llama_stack_api import (
Dataset, Dataset,
DatasetNotFoundError, DatasetNotFoundError,
DatasetPurpose,
Datasets,
DatasetType, DatasetType,
DataSource,
ListDatasetsResponse, ListDatasetsResponse,
ResourceType, ResourceType,
RowsDataSource, RowsDataSource,
URIDataSource, URIDataSource,
) )
from llama_stack_api.datasets.api import (
Datasets,
GetDatasetRequest,
RegisterDatasetRequest,
UnregisterDatasetRequest,
)
from .common import CommonRoutingTableImpl from .common import CommonRoutingTableImpl
@ -33,19 +35,17 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
async def list_datasets(self) -> ListDatasetsResponse: async def list_datasets(self) -> ListDatasetsResponse:
return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value)) return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value))
async def get_dataset(self, dataset_id: str) -> Dataset: async def get_dataset(self, request: GetDatasetRequest) -> Dataset:
dataset = await self.get_object_by_identifier("dataset", dataset_id) dataset = await self.get_object_by_identifier("dataset", request.dataset_id)
if dataset is None: if dataset is None:
raise DatasetNotFoundError(dataset_id) raise DatasetNotFoundError(request.dataset_id)
return dataset return dataset
async def register_dataset( async def register_dataset(self, request: RegisterDatasetRequest) -> Dataset:
self, purpose = request.purpose
purpose: DatasetPurpose, source = request.source
source: DataSource, metadata = request.metadata
metadata: dict[str, Any] | None = None, dataset_id = request.dataset_id
dataset_id: str | None = None,
) -> Dataset:
if isinstance(source, dict): if isinstance(source, dict):
if source["type"] == "uri": if source["type"] == "uri":
source = URIDataSource.parse_obj(source) source = URIDataSource.parse_obj(source)
@ -86,6 +86,6 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
await self.register_object(dataset) await self.register_object(dataset)
return dataset return dataset
async def unregister_dataset(self, dataset_id: str) -> None: async def unregister_dataset(self, request: UnregisterDatasetRequest) -> None:
dataset = await self.get_dataset(dataset_id) dataset = await self.get_dataset(GetDatasetRequest(dataset_id=request.dataset_id))
await self.unregister_object(dataset) await self.unregister_object(dataset)

View file

@ -17,7 +17,7 @@ from fastapi import APIRouter
from fastapi.routing import APIRoute from fastapi.routing import APIRoute
from starlette.routing import Route from starlette.routing import Route
from llama_stack_api import batches, benchmarks from llama_stack_api import batches, benchmarks, datasets
# Router factories for APIs that have FastAPI routers # Router factories for APIs that have FastAPI routers
# Add new APIs here as they are migrated to the router system # Add new APIs here as they are migrated to the router system
@ -26,6 +26,7 @@ from llama_stack_api.datatypes import Api
_ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = { _ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = {
"batches": batches.fastapi_routes.create_router, "batches": batches.fastapi_routes.create_router,
"benchmarks": benchmarks.fastapi_routes.create_router, "benchmarks": benchmarks.fastapi_routes.create_router,
"datasets": datasets.fastapi_routes.create_router,
} }

View file

@ -1,248 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from enum import Enum, StrEnum
from typing import Annotated, Any, Literal, Protocol
from pydantic import BaseModel, Field
from llama_stack_api.resource import Resource, ResourceType
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
from llama_stack_api.version import LLAMA_STACK_API_V1BETA
class DatasetPurpose(StrEnum):
"""
Purpose of the dataset. Each purpose has a required input data schema.
:cvar post-training/messages: The dataset contains messages used for post-training.
{
"messages": [
{"role": "user", "content": "Hello, world!"},
{"role": "assistant", "content": "Hello, world!"},
]
}
:cvar eval/question-answer: The dataset contains a question column and an answer column.
{
"question": "What is the capital of France?",
"answer": "Paris"
}
:cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column.
{
"messages": [
{"role": "user", "content": "Hello, my name is John Doe."},
{"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
{"role": "user", "content": "What's my name?"},
],
"answer": "John Doe"
}
"""
post_training_messages = "post-training/messages"
eval_question_answer = "eval/question-answer"
eval_messages_answer = "eval/messages-answer"
# TODO: add more schemas here
class DatasetType(Enum):
"""
Type of the dataset source.
:cvar uri: The dataset can be obtained from a URI.
:cvar rows: The dataset is stored in rows.
"""
uri = "uri"
rows = "rows"
@json_schema_type
class URIDataSource(BaseModel):
"""A dataset that can be obtained from a URI.
:param uri: The dataset can be obtained from a URI. E.g.
- "https://mywebsite.com/mydata.jsonl"
- "lsfs://mydata.jsonl"
- "data:csv;base64,{base64_content}"
"""
type: Literal["uri"] = "uri"
uri: str
@json_schema_type
class RowsDataSource(BaseModel):
"""A dataset stored in rows.
:param rows: The dataset is stored in rows. E.g.
- [
{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}
]
"""
type: Literal["rows"] = "rows"
rows: list[dict[str, Any]]
DataSource = Annotated[
URIDataSource | RowsDataSource,
Field(discriminator="type"),
]
register_schema(DataSource, name="DataSource")
class CommonDatasetFields(BaseModel):
"""
Common fields for a dataset.
:param purpose: Purpose of the dataset indicating its intended use
:param source: Data source configuration for the dataset
:param metadata: Additional metadata for the dataset
"""
purpose: DatasetPurpose
source: DataSource
metadata: dict[str, Any] = Field(
default_factory=dict,
description="Any additional metadata for this dataset",
)
@json_schema_type
class Dataset(CommonDatasetFields, Resource):
"""Dataset resource for storing and accessing training or evaluation data.
:param type: Type of resource, always 'dataset' for datasets
"""
type: Literal[ResourceType.dataset] = ResourceType.dataset
@property
def dataset_id(self) -> str:
return self.identifier
@property
def provider_dataset_id(self) -> str | None:
return self.provider_resource_id
class DatasetInput(CommonDatasetFields, BaseModel):
"""Input parameters for dataset operations.
:param dataset_id: Unique identifier for the dataset
"""
dataset_id: str
@json_schema_type
class ListDatasetsResponse(BaseModel):
"""Response from listing datasets.
:param data: List of datasets
"""
data: list[Dataset]
class Datasets(Protocol):
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA, deprecated=True)
async def register_dataset(
self,
purpose: DatasetPurpose,
source: DataSource,
metadata: dict[str, Any] | None = None,
dataset_id: str | None = None,
) -> Dataset:
"""
Register a new dataset.
:param purpose: The purpose of the dataset.
One of:
- "post-training/messages": The dataset contains a messages column with list of messages for post-training.
{
"messages": [
{"role": "user", "content": "Hello, world!"},
{"role": "assistant", "content": "Hello, world!"},
]
}
- "eval/question-answer": The dataset contains a question column and an answer column for evaluation.
{
"question": "What is the capital of France?",
"answer": "Paris"
}
- "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column for evaluation.
{
"messages": [
{"role": "user", "content": "Hello, my name is John Doe."},
{"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
{"role": "user", "content": "What's my name?"},
],
"answer": "John Doe"
}
:param source: The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples:
- {
"type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl"
}
- {
"type": "uri",
"uri": "lsfs://mydata.jsonl"
}
- {
"type": "uri",
"uri": "data:csv;base64,{base64_content}"
}
- {
"type": "uri",
"uri": "huggingface://llamastack/simpleqa?split=train"
}
- {
"type": "rows",
"rows": [
{
"messages": [
{"role": "user", "content": "Hello, world!"},
{"role": "assistant", "content": "Hello, world!"},
]
}
]
}
:param metadata: The metadata for the dataset.
- E.g. {"description": "My dataset"}.
:param dataset_id: The ID of the dataset. If not provided, an ID will be generated.
:returns: A Dataset.
"""
...
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
async def get_dataset(
self,
dataset_id: str,
) -> Dataset:
"""Get a dataset by its ID.
:param dataset_id: The ID of the dataset to get.
:returns: A Dataset.
"""
...
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA)
async def list_datasets(self) -> ListDatasetsResponse:
"""List all datasets.
:returns: A ListDatasetsResponse.
"""
...
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA, deprecated=True)
async def unregister_dataset(
self,
dataset_id: str,
) -> None:
"""Unregister a dataset by its ID.
:param dataset_id: The ID of the dataset to unregister.
"""
...

View file

@ -0,0 +1,61 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""Datasets API protocol and models.
This module contains the Datasets protocol definition.
Pydantic models are defined in llama_stack_api.datasets.models.
The FastAPI router is defined in llama_stack_api.datasets.fastapi_routes.
"""
# Import fastapi_routes for router factory access
from . import fastapi_routes
# Import new protocol for FastAPI router
from .api import Datasets
# Import models for re-export
from .models import (
CommonDatasetFields,
Dataset,
DatasetPurpose,
DatasetType,
DataSource,
GetDatasetRequest,
ListDatasetsResponse,
RegisterDatasetRequest,
RowsDataSource,
UnregisterDatasetRequest,
URIDataSource,
)
# Define DatasetInput for backward compatibility
class DatasetInput(CommonDatasetFields):
"""Input parameters for dataset operations.
:param dataset_id: Unique identifier for the dataset
"""
dataset_id: str
__all__ = [
"Datasets",
"Dataset",
"CommonDatasetFields",
"DatasetPurpose",
"DataSource",
"DatasetInput",
"DatasetType",
"RowsDataSource",
"URIDataSource",
"ListDatasetsResponse",
"RegisterDatasetRequest",
"GetDatasetRequest",
"UnregisterDatasetRequest",
"fastapi_routes",
]

View file

@ -0,0 +1,35 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""Datasets API protocol definition.
This module contains the Datasets protocol definition.
Pydantic models are defined in llama_stack_api.datasets.models.
The FastAPI router is defined in llama_stack_api.datasets.fastapi_routes.
"""
from typing import Protocol, runtime_checkable
from .models import (
Dataset,
GetDatasetRequest,
ListDatasetsResponse,
RegisterDatasetRequest,
UnregisterDatasetRequest,
)
@runtime_checkable
class Datasets(Protocol):
"""Protocol for dataset management operations."""
async def register_dataset(self, request: RegisterDatasetRequest) -> Dataset: ...
async def get_dataset(self, request: GetDatasetRequest) -> Dataset: ...
async def list_datasets(self) -> ListDatasetsResponse: ...
async def unregister_dataset(self, request: UnregisterDatasetRequest) -> None: ...

View file

@ -0,0 +1,104 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""FastAPI router for the Datasets API.
This module defines the FastAPI router for the Datasets API using standard
FastAPI route decorators.
"""
from typing import Annotated
from fastapi import APIRouter, Body, Depends
from llama_stack_api.router_utils import create_path_dependency, standard_responses
from llama_stack_api.version import LLAMA_STACK_API_V1BETA
from .api import Datasets
from .models import (
Dataset,
GetDatasetRequest,
ListDatasetsResponse,
RegisterDatasetRequest,
UnregisterDatasetRequest,
)
# Path parameter dependencies for single-field models
get_dataset_request = create_path_dependency(GetDatasetRequest)
unregister_dataset_request = create_path_dependency(UnregisterDatasetRequest)
def create_router(impl: Datasets) -> APIRouter:
"""Create a FastAPI router for the Datasets API.
Args:
impl: The Datasets implementation instance
Returns:
APIRouter configured for the Datasets API
"""
router = APIRouter(
prefix=f"/{LLAMA_STACK_API_V1BETA}",
tags=["Datasets"],
responses=standard_responses,
)
@router.post(
"/datasets",
response_model=Dataset,
summary="Register a new dataset.",
description="Register a new dataset.",
responses={
200: {"description": "The registered dataset object."},
},
deprecated=True,
)
async def register_dataset(
request: Annotated[RegisterDatasetRequest, Body(...)],
) -> Dataset:
return await impl.register_dataset(request)
@router.get(
"/datasets/{dataset_id:path}",
response_model=Dataset,
summary="Get a dataset by its ID.",
description="Get a dataset by its ID.",
responses={
200: {"description": "The dataset object."},
},
)
async def get_dataset(
request: Annotated[GetDatasetRequest, Depends(get_dataset_request)],
) -> Dataset:
return await impl.get_dataset(request)
@router.get(
"/datasets",
response_model=ListDatasetsResponse,
summary="List all datasets.",
description="List all datasets.",
responses={
200: {"description": "A list of dataset objects."},
},
)
async def list_datasets() -> ListDatasetsResponse:
return await impl.list_datasets()
@router.delete(
"/datasets/{dataset_id:path}",
summary="Unregister a dataset by its ID.",
description="Unregister a dataset by its ID.",
responses={
200: {"description": "The dataset was successfully unregistered."},
},
deprecated=True,
)
async def unregister_dataset(
request: Annotated[UnregisterDatasetRequest, Depends(unregister_dataset_request)],
) -> None:
return await impl.unregister_dataset(request)
return router

View file

@ -0,0 +1,152 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""Pydantic models for Datasets API requests and responses.
This module defines the request and response models for the Datasets API
using Pydantic with Field descriptions for OpenAPI schema generation.
"""
from enum import Enum, StrEnum
from typing import Annotated, Any, Literal
from pydantic import BaseModel, Field
from llama_stack_api.resource import Resource, ResourceType
from llama_stack_api.schema_utils import json_schema_type, register_schema
class DatasetPurpose(StrEnum):
"""Purpose of the dataset. Each purpose has a required input data schema."""
post_training_messages = "post-training/messages"
"""The dataset contains messages used for post-training."""
eval_question_answer = "eval/question-answer"
"""The dataset contains a question column and an answer column."""
eval_messages_answer = "eval/messages-answer"
"""The dataset contains a messages column with list of messages and an answer column."""
class DatasetType(Enum):
"""Type of the dataset source."""
uri = "uri"
"""The dataset can be obtained from a URI."""
rows = "rows"
"""The dataset is stored in rows."""
@json_schema_type
class URIDataSource(BaseModel):
"""A dataset that can be obtained from a URI."""
type: Literal["uri"] = Field(default="uri", description="The type of data source.")
uri: str = Field(
...,
description='The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"',
)
@json_schema_type
class RowsDataSource(BaseModel):
"""A dataset stored in rows."""
type: Literal["rows"] = Field(default="rows", description="The type of data source.")
rows: list[dict[str, Any]] = Field(
...,
description='The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]',
)
DataSource = Annotated[
URIDataSource | RowsDataSource,
Field(discriminator="type"),
]
register_schema(DataSource, name="DataSource")
class CommonDatasetFields(BaseModel):
"""Common fields for a dataset."""
purpose: DatasetPurpose = Field(..., description="Purpose of the dataset indicating its intended use")
source: DataSource = Field(..., description="Data source configuration for the dataset")
metadata: dict[str, Any] = Field(
default_factory=dict,
description="Any additional metadata for this dataset",
)
@json_schema_type
class Dataset(CommonDatasetFields, Resource):
"""Dataset resource for storing and accessing training or evaluation data."""
type: Literal[ResourceType.dataset] = Field(
default=ResourceType.dataset,
description="Type of resource, always 'dataset' for datasets",
)
@property
def dataset_id(self) -> str:
return self.identifier
@property
def provider_dataset_id(self) -> str | None:
return self.provider_resource_id
@json_schema_type
class ListDatasetsResponse(BaseModel):
"""Response from listing datasets."""
data: list[Dataset] = Field(..., description="List of datasets")
# Request models for each endpoint
@json_schema_type
class RegisterDatasetRequest(BaseModel):
"""Request model for registering a dataset."""
purpose: DatasetPurpose = Field(..., description="The purpose of the dataset.")
source: DataSource = Field(..., description="The data source of the dataset.")
metadata: dict[str, Any] | None = Field(
default=None,
description="The metadata for the dataset.",
)
dataset_id: str | None = Field(
default=None,
description="The ID of the dataset. If not provided, an ID will be generated.",
)
@json_schema_type
class GetDatasetRequest(BaseModel):
"""Request model for getting a dataset by ID."""
dataset_id: str = Field(..., description="The ID of the dataset to get.")
@json_schema_type
class UnregisterDatasetRequest(BaseModel):
"""Request model for unregistering a dataset."""
dataset_id: str = Field(..., description="The ID of the dataset to unregister.")
__all__ = [
"CommonDatasetFields",
"Dataset",
"DatasetPurpose",
"DatasetType",
"DataSource",
"RowsDataSource",
"URIDataSource",
"ListDatasetsResponse",
"RegisterDatasetRequest",
"GetDatasetRequest",
"UnregisterDatasetRequest",
]

View file

@ -35,6 +35,10 @@ from llama_stack_api import (
UnregisterBenchmarkRequest, UnregisterBenchmarkRequest,
URIDataSource, URIDataSource,
) )
from llama_stack_api.datasets import (
RegisterDatasetRequest,
UnregisterDatasetRequest,
)
class Impl: class Impl:
@ -261,10 +265,18 @@ async def test_datasets_routing_table(cached_disk_dist_registry):
# Register multiple datasets and verify listing # Register multiple datasets and verify listing
await table.register_dataset( await table.register_dataset(
dataset_id="test-dataset", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource(uri="test-uri") RegisterDatasetRequest(
dataset_id="test-dataset",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(uri="test-uri"),
)
) )
await table.register_dataset( await table.register_dataset(
dataset_id="test-dataset-2", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource(uri="test-uri-2") RegisterDatasetRequest(
dataset_id="test-dataset-2",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(uri="test-uri-2"),
)
) )
datasets = await table.list_datasets() datasets = await table.list_datasets()
@ -273,8 +285,8 @@ async def test_datasets_routing_table(cached_disk_dist_registry):
assert "test-dataset" in dataset_ids assert "test-dataset" in dataset_ids
assert "test-dataset-2" in dataset_ids assert "test-dataset-2" in dataset_ids
await table.unregister_dataset(dataset_id="test-dataset") await table.unregister_dataset(UnregisterDatasetRequest(dataset_id="test-dataset"))
await table.unregister_dataset(dataset_id="test-dataset-2") await table.unregister_dataset(UnregisterDatasetRequest(dataset_id="test-dataset-2"))
datasets = await table.list_datasets() datasets = await table.list_datasets()
assert len(datasets.data) == 0 assert len(datasets.data) == 0