diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index b0e91c5eb..edc2a62f6 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -3268,7 +3268,7 @@ paths: get: responses: '200': - description: A ListDatasetsResponse. + description: A list of dataset objects. content: application/json: schema: @@ -3287,13 +3287,13 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Datasets - summary: List Datasets + summary: List all datasets. description: List all datasets. operationId: list_datasets_v1beta_datasets_get post: responses: '200': - description: A Dataset. + description: The registered dataset object. content: application/json: schema: @@ -3312,7 +3312,7 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Datasets - summary: Register Dataset + summary: Register a new dataset. description: Register a new dataset. operationId: register_dataset_v1beta_datasets_post requestBody: @@ -3326,26 +3326,26 @@ paths: get: responses: '200': - description: A Dataset. + description: The dataset object. content: application/json: schema: $ref: '#/components/schemas/Dataset' '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response tags: - Datasets - summary: Get Dataset + summary: Get a dataset by its ID. description: Get a dataset by its ID. operationId: get_dataset_v1beta_datasets__dataset_id__get parameters: @@ -3354,26 +3354,28 @@ paths: required: true schema: type: string - description: 'Path parameter: dataset_id' + description: The ID of the dataset to get. + title: Dataset Id + description: The ID of the dataset to get. delete: responses: '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response '204': - description: Successful Response + description: The dataset was successfully unregistered. tags: - Datasets - summary: Unregister Dataset + summary: Unregister a dataset by its ID. description: Unregister a dataset by its ID. operationId: unregister_dataset_v1beta_datasets__dataset_id__delete parameters: @@ -3382,7 +3384,9 @@ paths: required: true schema: type: string - description: 'Path parameter: dataset_id' + description: The ID of the dataset to unregister. + title: Dataset Id + description: The ID of the dataset to unregister. deprecated: true /v1alpha/eval/benchmarks: get: @@ -10570,9 +10574,11 @@ components: type: string const: dataset title: Type + description: Type of resource, always 'dataset' for datasets default: dataset purpose: $ref: '#/components/schemas/DatasetPurpose' + description: Purpose of the dataset indicating its intended use source: oneOf: - $ref: '#/components/schemas/URIDataSource' @@ -10580,6 +10586,7 @@ components: - $ref: '#/components/schemas/RowsDataSource' title: RowsDataSource title: URIDataSource | RowsDataSource + description: Data source configuration for the dataset discriminator: propertyName: type mapping: @@ -10604,6 +10611,7 @@ components: type: string const: rows title: Type + description: The type of data source. default: rows rows: items: @@ -10611,6 +10619,7 @@ components: type: object type: array title: Rows + description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]' type: object required: - rows @@ -10622,10 +10631,12 @@ components: type: string const: uri title: Type + description: The type of data source. default: uri uri: type: string title: Uri + description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}" type: object required: - uri @@ -10638,6 +10649,7 @@ components: $ref: '#/components/schemas/Dataset' type: array title: Data + description: List of datasets type: object required: - data @@ -11635,27 +11647,37 @@ components: properties: purpose: $ref: '#/components/schemas/DatasetPurpose' + description: The purpose of the dataset. source: - anyOf: + oneOf: - $ref: '#/components/schemas/URIDataSource' title: URIDataSource - $ref: '#/components/schemas/RowsDataSource' title: RowsDataSource title: URIDataSource | RowsDataSource + description: The data source of the dataset. + discriminator: + propertyName: type + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' metadata: anyOf: - additionalProperties: true type: object - type: 'null' + description: The metadata for the dataset. dataset_id: anyOf: - type: string - type: 'null' + description: The ID of the dataset. If not provided, an ID will be generated. type: object required: - purpose - source title: RegisterDatasetRequest + description: Request model for registering a dataset. RegisterBenchmarkRequest: properties: benchmark_id: @@ -13488,6 +13510,28 @@ components: - items title: ConversationItemCreateRequest type: object + GetDatasetRequest: + description: Request model for getting a dataset by ID. + properties: + dataset_id: + description: The ID of the dataset to get. + title: Dataset Id + type: string + required: + - dataset_id + title: GetDatasetRequest + type: object + UnregisterDatasetRequest: + description: Request model for unregistering a dataset. + properties: + dataset_id: + description: The ID of the dataset to unregister. + title: Dataset Id + type: string + required: + - dataset_id + title: UnregisterDatasetRequest + type: object Api: description: Enumeration of all available APIs in the Llama Stack system. enum: diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 0058b9529..966e89f8b 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -655,7 +655,7 @@ paths: get: responses: '200': - description: A ListDatasetsResponse. + description: A list of dataset objects. content: application/json: schema: @@ -674,13 +674,13 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Datasets - summary: List Datasets + summary: List all datasets. description: List all datasets. operationId: list_datasets_v1beta_datasets_get post: responses: '200': - description: A Dataset. + description: The registered dataset object. content: application/json: schema: @@ -699,7 +699,7 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Datasets - summary: Register Dataset + summary: Register a new dataset. description: Register a new dataset. operationId: register_dataset_v1beta_datasets_post requestBody: @@ -713,26 +713,26 @@ paths: get: responses: '200': - description: A Dataset. + description: The dataset object. content: application/json: schema: $ref: '#/components/schemas/Dataset' '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response tags: - Datasets - summary: Get Dataset + summary: Get a dataset by its ID. description: Get a dataset by its ID. operationId: get_dataset_v1beta_datasets__dataset_id__get parameters: @@ -741,26 +741,28 @@ paths: required: true schema: type: string - description: 'Path parameter: dataset_id' + description: The ID of the dataset to get. + title: Dataset Id + description: The ID of the dataset to get. delete: responses: '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response '204': - description: Successful Response + description: The dataset was successfully unregistered. tags: - Datasets - summary: Unregister Dataset + summary: Unregister a dataset by its ID. description: Unregister a dataset by its ID. operationId: unregister_dataset_v1beta_datasets__dataset_id__delete parameters: @@ -769,7 +771,9 @@ paths: required: true schema: type: string - description: 'Path parameter: dataset_id' + description: The ID of the dataset to unregister. + title: Dataset Id + description: The ID of the dataset to unregister. deprecated: true /v1alpha/eval/benchmarks: get: @@ -7396,9 +7400,11 @@ components: type: string const: dataset title: Type + description: Type of resource, always 'dataset' for datasets default: dataset purpose: $ref: '#/components/schemas/DatasetPurpose' + description: Purpose of the dataset indicating its intended use source: oneOf: - $ref: '#/components/schemas/URIDataSource' @@ -7406,6 +7412,7 @@ components: - $ref: '#/components/schemas/RowsDataSource' title: RowsDataSource title: URIDataSource | RowsDataSource + description: Data source configuration for the dataset discriminator: propertyName: type mapping: @@ -7430,6 +7437,7 @@ components: type: string const: rows title: Type + description: The type of data source. default: rows rows: items: @@ -7437,6 +7445,7 @@ components: type: object type: array title: Rows + description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]' type: object required: - rows @@ -7448,10 +7457,12 @@ components: type: string const: uri title: Type + description: The type of data source. default: uri uri: type: string title: Uri + description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}" type: object required: - uri @@ -7464,6 +7475,7 @@ components: $ref: '#/components/schemas/Dataset' type: array title: Data + description: List of datasets type: object required: - data @@ -8461,27 +8473,37 @@ components: properties: purpose: $ref: '#/components/schemas/DatasetPurpose' + description: The purpose of the dataset. source: - anyOf: + oneOf: - $ref: '#/components/schemas/URIDataSource' title: URIDataSource - $ref: '#/components/schemas/RowsDataSource' title: RowsDataSource title: URIDataSource | RowsDataSource + description: The data source of the dataset. + discriminator: + propertyName: type + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' metadata: anyOf: - additionalProperties: true type: object - type: 'null' + description: The metadata for the dataset. dataset_id: anyOf: - type: string - type: 'null' + description: The ID of the dataset. If not provided, an ID will be generated. type: object required: - purpose - source title: RegisterDatasetRequest + description: Request model for registering a dataset. RegisterBenchmarkRequest: properties: benchmark_id: @@ -10314,6 +10336,28 @@ components: - items title: ConversationItemCreateRequest type: object + GetDatasetRequest: + description: Request model for getting a dataset by ID. + properties: + dataset_id: + description: The ID of the dataset to get. + title: Dataset Id + type: string + required: + - dataset_id + title: GetDatasetRequest + type: object + UnregisterDatasetRequest: + description: Request model for unregistering a dataset. + properties: + dataset_id: + description: The ID of the dataset to unregister. + title: Dataset Id + type: string + required: + - dataset_id + title: UnregisterDatasetRequest + type: object Api: description: Enumeration of all available APIs in the Llama Stack system. enum: diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index 39e05aae8..dc4386035 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -110,7 +110,7 @@ paths: get: responses: '200': - description: A ListDatasetsResponse. + description: A list of dataset objects. content: application/json: schema: @@ -129,33 +129,33 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Datasets - summary: List Datasets + summary: List all datasets. description: List all datasets. operationId: list_datasets_v1beta_datasets_get /v1beta/datasets/{dataset_id}: get: responses: '200': - description: A Dataset. + description: The dataset object. content: application/json: schema: $ref: '#/components/schemas/Dataset' '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response tags: - Datasets - summary: Get Dataset + summary: Get a dataset by its ID. description: Get a dataset by its ID. operationId: get_dataset_v1beta_datasets__dataset_id__get parameters: @@ -164,7 +164,9 @@ paths: required: true schema: type: string - description: 'Path parameter: dataset_id' + description: The ID of the dataset to get. + title: Dataset Id + description: The ID of the dataset to get. /v1alpha/eval/benchmarks: get: responses: @@ -6659,9 +6661,11 @@ components: type: string const: dataset title: Type + description: Type of resource, always 'dataset' for datasets default: dataset purpose: $ref: '#/components/schemas/DatasetPurpose' + description: Purpose of the dataset indicating its intended use source: oneOf: - $ref: '#/components/schemas/URIDataSource' @@ -6669,6 +6673,7 @@ components: - $ref: '#/components/schemas/RowsDataSource' title: RowsDataSource title: URIDataSource | RowsDataSource + description: Data source configuration for the dataset discriminator: propertyName: type mapping: @@ -6693,6 +6698,7 @@ components: type: string const: rows title: Type + description: The type of data source. default: rows rows: items: @@ -6700,6 +6706,7 @@ components: type: object type: array title: Rows + description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]' type: object required: - rows @@ -6711,10 +6718,12 @@ components: type: string const: uri title: Type + description: The type of data source. default: uri uri: type: string title: Uri + description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}" type: object required: - uri @@ -6727,6 +6736,7 @@ components: $ref: '#/components/schemas/Dataset' type: array title: Data + description: List of datasets type: object required: - data @@ -7585,6 +7595,41 @@ components: - $ref: '#/components/schemas/RowsDataSource' title: RowsDataSource title: URIDataSource | RowsDataSource + RegisterDatasetRequest: + properties: + purpose: + $ref: '#/components/schemas/DatasetPurpose' + description: The purpose of the dataset. + source: + oneOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + description: The data source of the dataset. + discriminator: + propertyName: type + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + description: The metadata for the dataset. + dataset_id: + anyOf: + - type: string + - type: 'null' + description: The ID of the dataset. If not provided, an ID will be generated. + type: object + required: + - purpose + - source + title: RegisterDatasetRequest + description: Request model for registering a dataset. RegisterBenchmarkRequest: properties: benchmark_id: @@ -9208,6 +9253,28 @@ components: - items title: ConversationItemCreateRequest type: object + GetDatasetRequest: + description: Request model for getting a dataset by ID. + properties: + dataset_id: + description: The ID of the dataset to get. + title: Dataset Id + type: string + required: + - dataset_id + title: GetDatasetRequest + type: object + UnregisterDatasetRequest: + description: Request model for unregistering a dataset. + properties: + dataset_id: + description: The ID of the dataset to unregister. + title: Dataset Id + type: string + required: + - dataset_id + title: UnregisterDatasetRequest + type: object Api: description: Enumeration of all available APIs in the Llama Stack system. enum: diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 1a956f7a8..d3387519c 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -9186,9 +9186,11 @@ components: type: string const: dataset title: Type + description: Type of resource, always 'dataset' for datasets default: dataset purpose: $ref: '#/components/schemas/DatasetPurpose' + description: Purpose of the dataset indicating its intended use source: oneOf: - $ref: '#/components/schemas/URIDataSource' @@ -9196,6 +9198,7 @@ components: - $ref: '#/components/schemas/RowsDataSource' title: RowsDataSource title: URIDataSource | RowsDataSource + description: Data source configuration for the dataset discriminator: propertyName: type mapping: @@ -9220,6 +9223,7 @@ components: type: string const: rows title: Type + description: The type of data source. default: rows rows: items: @@ -9227,6 +9231,7 @@ components: type: object type: array title: Rows + description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]' type: object required: - rows @@ -9238,10 +9243,12 @@ components: type: string const: uri title: Type + description: The type of data source. default: uri uri: type: string title: Uri + description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}" type: object required: - uri @@ -9254,6 +9261,7 @@ components: $ref: '#/components/schemas/Dataset' type: array title: Data + description: List of datasets type: object required: - data @@ -9965,6 +9973,41 @@ components: - $ref: '#/components/schemas/RowsDataSource' title: RowsDataSource title: URIDataSource | RowsDataSource + RegisterDatasetRequest: + properties: + purpose: + $ref: '#/components/schemas/DatasetPurpose' + description: The purpose of the dataset. + source: + oneOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + description: The data source of the dataset. + discriminator: + propertyName: type + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + description: The metadata for the dataset. + dataset_id: + anyOf: + - type: string + - type: 'null' + description: The ID of the dataset. If not provided, an ID will be generated. + type: object + required: + - purpose + - source + title: RegisterDatasetRequest + description: Request model for registering a dataset. RegisterBenchmarkRequest: properties: benchmark_id: @@ -11797,6 +11840,28 @@ components: - items title: ConversationItemCreateRequest type: object + GetDatasetRequest: + description: Request model for getting a dataset by ID. + properties: + dataset_id: + description: The ID of the dataset to get. + title: Dataset Id + type: string + required: + - dataset_id + title: GetDatasetRequest + type: object + UnregisterDatasetRequest: + description: Request model for unregistering a dataset. + properties: + dataset_id: + description: The ID of the dataset to unregister. + title: Dataset Id + type: string + required: + - dataset_id + title: UnregisterDatasetRequest + type: object Api: description: Enumeration of all available APIs in the Llama Stack system. enum: diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index b0e91c5eb..edc2a62f6 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -3268,7 +3268,7 @@ paths: get: responses: '200': - description: A ListDatasetsResponse. + description: A list of dataset objects. content: application/json: schema: @@ -3287,13 +3287,13 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Datasets - summary: List Datasets + summary: List all datasets. description: List all datasets. operationId: list_datasets_v1beta_datasets_get post: responses: '200': - description: A Dataset. + description: The registered dataset object. content: application/json: schema: @@ -3312,7 +3312,7 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Datasets - summary: Register Dataset + summary: Register a new dataset. description: Register a new dataset. operationId: register_dataset_v1beta_datasets_post requestBody: @@ -3326,26 +3326,26 @@ paths: get: responses: '200': - description: A Dataset. + description: The dataset object. content: application/json: schema: $ref: '#/components/schemas/Dataset' '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response tags: - Datasets - summary: Get Dataset + summary: Get a dataset by its ID. description: Get a dataset by its ID. operationId: get_dataset_v1beta_datasets__dataset_id__get parameters: @@ -3354,26 +3354,28 @@ paths: required: true schema: type: string - description: 'Path parameter: dataset_id' + description: The ID of the dataset to get. + title: Dataset Id + description: The ID of the dataset to get. delete: responses: '400': - description: Bad Request $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - description: Too Many Requests $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - description: Internal Server Error $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: - description: Default Response $ref: '#/components/responses/DefaultError' + description: Default Response '204': - description: Successful Response + description: The dataset was successfully unregistered. tags: - Datasets - summary: Unregister Dataset + summary: Unregister a dataset by its ID. description: Unregister a dataset by its ID. operationId: unregister_dataset_v1beta_datasets__dataset_id__delete parameters: @@ -3382,7 +3384,9 @@ paths: required: true schema: type: string - description: 'Path parameter: dataset_id' + description: The ID of the dataset to unregister. + title: Dataset Id + description: The ID of the dataset to unregister. deprecated: true /v1alpha/eval/benchmarks: get: @@ -10570,9 +10574,11 @@ components: type: string const: dataset title: Type + description: Type of resource, always 'dataset' for datasets default: dataset purpose: $ref: '#/components/schemas/DatasetPurpose' + description: Purpose of the dataset indicating its intended use source: oneOf: - $ref: '#/components/schemas/URIDataSource' @@ -10580,6 +10586,7 @@ components: - $ref: '#/components/schemas/RowsDataSource' title: RowsDataSource title: URIDataSource | RowsDataSource + description: Data source configuration for the dataset discriminator: propertyName: type mapping: @@ -10604,6 +10611,7 @@ components: type: string const: rows title: Type + description: The type of data source. default: rows rows: items: @@ -10611,6 +10619,7 @@ components: type: object type: array title: Rows + description: 'The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]' type: object required: - rows @@ -10622,10 +10631,12 @@ components: type: string const: uri title: Type + description: The type of data source. default: uri uri: type: string title: Uri + description: The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}" type: object required: - uri @@ -10638,6 +10649,7 @@ components: $ref: '#/components/schemas/Dataset' type: array title: Data + description: List of datasets type: object required: - data @@ -11635,27 +11647,37 @@ components: properties: purpose: $ref: '#/components/schemas/DatasetPurpose' + description: The purpose of the dataset. source: - anyOf: + oneOf: - $ref: '#/components/schemas/URIDataSource' title: URIDataSource - $ref: '#/components/schemas/RowsDataSource' title: RowsDataSource title: URIDataSource | RowsDataSource + description: The data source of the dataset. + discriminator: + propertyName: type + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' metadata: anyOf: - additionalProperties: true type: object - type: 'null' + description: The metadata for the dataset. dataset_id: anyOf: - type: string - type: 'null' + description: The ID of the dataset. If not provided, an ID will be generated. type: object required: - purpose - source title: RegisterDatasetRequest + description: Request model for registering a dataset. RegisterBenchmarkRequest: properties: benchmark_id: @@ -13488,6 +13510,28 @@ components: - items title: ConversationItemCreateRequest type: object + GetDatasetRequest: + description: Request model for getting a dataset by ID. + properties: + dataset_id: + description: The ID of the dataset to get. + title: Dataset Id + type: string + required: + - dataset_id + title: GetDatasetRequest + type: object + UnregisterDatasetRequest: + description: Request model for unregistering a dataset. + properties: + dataset_id: + description: The ID of the dataset to unregister. + title: Dataset Id + type: string + required: + - dataset_id + title: UnregisterDatasetRequest + type: object Api: description: Enumeration of all available APIs in the Llama Stack system. enum: diff --git a/src/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py index 62fd07b13..43aa0692c 100644 --- a/src/llama_stack/core/routing_tables/datasets.py +++ b/src/llama_stack/core/routing_tables/datasets.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import uuid -from typing import Any from llama_stack.core.datatypes import ( DatasetWithOwner, @@ -14,15 +13,18 @@ from llama_stack.log import get_logger from llama_stack_api import ( Dataset, DatasetNotFoundError, - DatasetPurpose, - Datasets, DatasetType, - DataSource, ListDatasetsResponse, ResourceType, RowsDataSource, URIDataSource, ) +from llama_stack_api.datasets.api import ( + Datasets, + GetDatasetRequest, + RegisterDatasetRequest, + UnregisterDatasetRequest, +) from .common import CommonRoutingTableImpl @@ -33,19 +35,17 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets): async def list_datasets(self) -> ListDatasetsResponse: return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value)) - async def get_dataset(self, dataset_id: str) -> Dataset: - dataset = await self.get_object_by_identifier("dataset", dataset_id) + async def get_dataset(self, request: GetDatasetRequest) -> Dataset: + dataset = await self.get_object_by_identifier("dataset", request.dataset_id) if dataset is None: - raise DatasetNotFoundError(dataset_id) + raise DatasetNotFoundError(request.dataset_id) return dataset - async def register_dataset( - self, - purpose: DatasetPurpose, - source: DataSource, - metadata: dict[str, Any] | None = None, - dataset_id: str | None = None, - ) -> Dataset: + async def register_dataset(self, request: RegisterDatasetRequest) -> Dataset: + purpose = request.purpose + source = request.source + metadata = request.metadata + dataset_id = request.dataset_id if isinstance(source, dict): if source["type"] == "uri": source = URIDataSource.parse_obj(source) @@ -86,6 +86,6 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets): await self.register_object(dataset) return dataset - async def unregister_dataset(self, dataset_id: str) -> None: - dataset = await self.get_dataset(dataset_id) + async def unregister_dataset(self, request: UnregisterDatasetRequest) -> None: + dataset = await self.get_dataset(GetDatasetRequest(dataset_id=request.dataset_id)) await self.unregister_object(dataset) diff --git a/src/llama_stack/core/server/fastapi_router_registry.py b/src/llama_stack/core/server/fastapi_router_registry.py index 4ca1a9837..12dbf2c75 100644 --- a/src/llama_stack/core/server/fastapi_router_registry.py +++ b/src/llama_stack/core/server/fastapi_router_registry.py @@ -17,7 +17,7 @@ from fastapi import APIRouter from fastapi.routing import APIRoute from starlette.routing import Route -from llama_stack_api import batches, benchmarks +from llama_stack_api import batches, benchmarks, datasets # Router factories for APIs that have FastAPI routers # Add new APIs here as they are migrated to the router system @@ -26,6 +26,7 @@ from llama_stack_api.datatypes import Api _ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = { "batches": batches.fastapi_routes.create_router, "benchmarks": benchmarks.fastapi_routes.create_router, + "datasets": datasets.fastapi_routes.create_router, } diff --git a/src/llama_stack_api/datasets.py b/src/llama_stack_api/datasets.py deleted file mode 100644 index 6d707aa8e..000000000 --- a/src/llama_stack_api/datasets.py +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import Enum, StrEnum -from typing import Annotated, Any, Literal, Protocol - -from pydantic import BaseModel, Field - -from llama_stack_api.resource import Resource, ResourceType -from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod -from llama_stack_api.version import LLAMA_STACK_API_V1BETA - - -class DatasetPurpose(StrEnum): - """ - Purpose of the dataset. Each purpose has a required input data schema. - - :cvar post-training/messages: The dataset contains messages used for post-training. - { - "messages": [ - {"role": "user", "content": "Hello, world!"}, - {"role": "assistant", "content": "Hello, world!"}, - ] - } - :cvar eval/question-answer: The dataset contains a question column and an answer column. - { - "question": "What is the capital of France?", - "answer": "Paris" - } - :cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column. - { - "messages": [ - {"role": "user", "content": "Hello, my name is John Doe."}, - {"role": "assistant", "content": "Hello, John Doe. How can I help you today?"}, - {"role": "user", "content": "What's my name?"}, - ], - "answer": "John Doe" - } - """ - - post_training_messages = "post-training/messages" - eval_question_answer = "eval/question-answer" - eval_messages_answer = "eval/messages-answer" - - # TODO: add more schemas here - - -class DatasetType(Enum): - """ - Type of the dataset source. - :cvar uri: The dataset can be obtained from a URI. - :cvar rows: The dataset is stored in rows. - """ - - uri = "uri" - rows = "rows" - - -@json_schema_type -class URIDataSource(BaseModel): - """A dataset that can be obtained from a URI. - :param uri: The dataset can be obtained from a URI. E.g. - - "https://mywebsite.com/mydata.jsonl" - - "lsfs://mydata.jsonl" - - "data:csv;base64,{base64_content}" - """ - - type: Literal["uri"] = "uri" - uri: str - - -@json_schema_type -class RowsDataSource(BaseModel): - """A dataset stored in rows. - :param rows: The dataset is stored in rows. E.g. - - [ - {"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]} - ] - """ - - type: Literal["rows"] = "rows" - rows: list[dict[str, Any]] - - -DataSource = Annotated[ - URIDataSource | RowsDataSource, - Field(discriminator="type"), -] -register_schema(DataSource, name="DataSource") - - -class CommonDatasetFields(BaseModel): - """ - Common fields for a dataset. - - :param purpose: Purpose of the dataset indicating its intended use - :param source: Data source configuration for the dataset - :param metadata: Additional metadata for the dataset - """ - - purpose: DatasetPurpose - source: DataSource - metadata: dict[str, Any] = Field( - default_factory=dict, - description="Any additional metadata for this dataset", - ) - - -@json_schema_type -class Dataset(CommonDatasetFields, Resource): - """Dataset resource for storing and accessing training or evaluation data. - - :param type: Type of resource, always 'dataset' for datasets - """ - - type: Literal[ResourceType.dataset] = ResourceType.dataset - - @property - def dataset_id(self) -> str: - return self.identifier - - @property - def provider_dataset_id(self) -> str | None: - return self.provider_resource_id - - -class DatasetInput(CommonDatasetFields, BaseModel): - """Input parameters for dataset operations. - - :param dataset_id: Unique identifier for the dataset - """ - - dataset_id: str - - -@json_schema_type -class ListDatasetsResponse(BaseModel): - """Response from listing datasets. - - :param data: List of datasets - """ - - data: list[Dataset] - - -class Datasets(Protocol): - @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA, deprecated=True) - async def register_dataset( - self, - purpose: DatasetPurpose, - source: DataSource, - metadata: dict[str, Any] | None = None, - dataset_id: str | None = None, - ) -> Dataset: - """ - Register a new dataset. - - :param purpose: The purpose of the dataset. - One of: - - "post-training/messages": The dataset contains a messages column with list of messages for post-training. - { - "messages": [ - {"role": "user", "content": "Hello, world!"}, - {"role": "assistant", "content": "Hello, world!"}, - ] - } - - "eval/question-answer": The dataset contains a question column and an answer column for evaluation. - { - "question": "What is the capital of France?", - "answer": "Paris" - } - - "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column for evaluation. - { - "messages": [ - {"role": "user", "content": "Hello, my name is John Doe."}, - {"role": "assistant", "content": "Hello, John Doe. How can I help you today?"}, - {"role": "user", "content": "What's my name?"}, - ], - "answer": "John Doe" - } - :param source: The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - - { - "type": "uri", - "uri": "https://mywebsite.com/mydata.jsonl" - } - - { - "type": "uri", - "uri": "lsfs://mydata.jsonl" - } - - { - "type": "uri", - "uri": "data:csv;base64,{base64_content}" - } - - { - "type": "uri", - "uri": "huggingface://llamastack/simpleqa?split=train" - } - - { - "type": "rows", - "rows": [ - { - "messages": [ - {"role": "user", "content": "Hello, world!"}, - {"role": "assistant", "content": "Hello, world!"}, - ] - } - ] - } - :param metadata: The metadata for the dataset. - - E.g. {"description": "My dataset"}. - :param dataset_id: The ID of the dataset. If not provided, an ID will be generated. - :returns: A Dataset. - """ - ... - - @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA) - async def get_dataset( - self, - dataset_id: str, - ) -> Dataset: - """Get a dataset by its ID. - - :param dataset_id: The ID of the dataset to get. - :returns: A Dataset. - """ - ... - - @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA) - async def list_datasets(self) -> ListDatasetsResponse: - """List all datasets. - - :returns: A ListDatasetsResponse. - """ - ... - - @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA, deprecated=True) - async def unregister_dataset( - self, - dataset_id: str, - ) -> None: - """Unregister a dataset by its ID. - - :param dataset_id: The ID of the dataset to unregister. - """ - ... diff --git a/src/llama_stack_api/datasets/__init__.py b/src/llama_stack_api/datasets/__init__.py new file mode 100644 index 000000000..cff53476e --- /dev/null +++ b/src/llama_stack_api/datasets/__init__.py @@ -0,0 +1,61 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Datasets API protocol and models. + +This module contains the Datasets protocol definition. +Pydantic models are defined in llama_stack_api.datasets.models. +The FastAPI router is defined in llama_stack_api.datasets.fastapi_routes. +""" + +# Import fastapi_routes for router factory access +from . import fastapi_routes + +# Import new protocol for FastAPI router +from .api import Datasets + +# Import models for re-export +from .models import ( + CommonDatasetFields, + Dataset, + DatasetPurpose, + DatasetType, + DataSource, + GetDatasetRequest, + ListDatasetsResponse, + RegisterDatasetRequest, + RowsDataSource, + UnregisterDatasetRequest, + URIDataSource, +) + + +# Define DatasetInput for backward compatibility +class DatasetInput(CommonDatasetFields): + """Input parameters for dataset operations. + + :param dataset_id: Unique identifier for the dataset + """ + + dataset_id: str + + +__all__ = [ + "Datasets", + "Dataset", + "CommonDatasetFields", + "DatasetPurpose", + "DataSource", + "DatasetInput", + "DatasetType", + "RowsDataSource", + "URIDataSource", + "ListDatasetsResponse", + "RegisterDatasetRequest", + "GetDatasetRequest", + "UnregisterDatasetRequest", + "fastapi_routes", +] diff --git a/src/llama_stack_api/datasets/api.py b/src/llama_stack_api/datasets/api.py new file mode 100644 index 000000000..981b438f0 --- /dev/null +++ b/src/llama_stack_api/datasets/api.py @@ -0,0 +1,35 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Datasets API protocol definition. + +This module contains the Datasets protocol definition. +Pydantic models are defined in llama_stack_api.datasets.models. +The FastAPI router is defined in llama_stack_api.datasets.fastapi_routes. +""" + +from typing import Protocol, runtime_checkable + +from .models import ( + Dataset, + GetDatasetRequest, + ListDatasetsResponse, + RegisterDatasetRequest, + UnregisterDatasetRequest, +) + + +@runtime_checkable +class Datasets(Protocol): + """Protocol for dataset management operations.""" + + async def register_dataset(self, request: RegisterDatasetRequest) -> Dataset: ... + + async def get_dataset(self, request: GetDatasetRequest) -> Dataset: ... + + async def list_datasets(self) -> ListDatasetsResponse: ... + + async def unregister_dataset(self, request: UnregisterDatasetRequest) -> None: ... diff --git a/src/llama_stack_api/datasets/fastapi_routes.py b/src/llama_stack_api/datasets/fastapi_routes.py new file mode 100644 index 000000000..07a32a59f --- /dev/null +++ b/src/llama_stack_api/datasets/fastapi_routes.py @@ -0,0 +1,104 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""FastAPI router for the Datasets API. + +This module defines the FastAPI router for the Datasets API using standard +FastAPI route decorators. +""" + +from typing import Annotated + +from fastapi import APIRouter, Body, Depends + +from llama_stack_api.router_utils import create_path_dependency, standard_responses +from llama_stack_api.version import LLAMA_STACK_API_V1BETA + +from .api import Datasets +from .models import ( + Dataset, + GetDatasetRequest, + ListDatasetsResponse, + RegisterDatasetRequest, + UnregisterDatasetRequest, +) + +# Path parameter dependencies for single-field models +get_dataset_request = create_path_dependency(GetDatasetRequest) +unregister_dataset_request = create_path_dependency(UnregisterDatasetRequest) + + +def create_router(impl: Datasets) -> APIRouter: + """Create a FastAPI router for the Datasets API. + + Args: + impl: The Datasets implementation instance + + Returns: + APIRouter configured for the Datasets API + """ + router = APIRouter( + prefix=f"/{LLAMA_STACK_API_V1BETA}", + tags=["Datasets"], + responses=standard_responses, + ) + + @router.post( + "/datasets", + response_model=Dataset, + summary="Register a new dataset.", + description="Register a new dataset.", + responses={ + 200: {"description": "The registered dataset object."}, + }, + deprecated=True, + ) + async def register_dataset( + request: Annotated[RegisterDatasetRequest, Body(...)], + ) -> Dataset: + return await impl.register_dataset(request) + + @router.get( + "/datasets/{dataset_id:path}", + response_model=Dataset, + summary="Get a dataset by its ID.", + description="Get a dataset by its ID.", + responses={ + 200: {"description": "The dataset object."}, + }, + ) + async def get_dataset( + request: Annotated[GetDatasetRequest, Depends(get_dataset_request)], + ) -> Dataset: + return await impl.get_dataset(request) + + @router.get( + "/datasets", + response_model=ListDatasetsResponse, + summary="List all datasets.", + description="List all datasets.", + responses={ + 200: {"description": "A list of dataset objects."}, + }, + ) + async def list_datasets() -> ListDatasetsResponse: + return await impl.list_datasets() + + @router.delete( + "/datasets/{dataset_id:path}", + summary="Unregister a dataset by its ID.", + description="Unregister a dataset by its ID.", + responses={ + 200: {"description": "The dataset was successfully unregistered."}, + }, + deprecated=True, + ) + async def unregister_dataset( + request: Annotated[UnregisterDatasetRequest, Depends(unregister_dataset_request)], + ) -> None: + return await impl.unregister_dataset(request) + + return router diff --git a/src/llama_stack_api/datasets/models.py b/src/llama_stack_api/datasets/models.py new file mode 100644 index 000000000..42527d095 --- /dev/null +++ b/src/llama_stack_api/datasets/models.py @@ -0,0 +1,152 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Pydantic models for Datasets API requests and responses. + +This module defines the request and response models for the Datasets API +using Pydantic with Field descriptions for OpenAPI schema generation. +""" + +from enum import Enum, StrEnum +from typing import Annotated, Any, Literal + +from pydantic import BaseModel, Field + +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, register_schema + + +class DatasetPurpose(StrEnum): + """Purpose of the dataset. Each purpose has a required input data schema.""" + + post_training_messages = "post-training/messages" + """The dataset contains messages used for post-training.""" + eval_question_answer = "eval/question-answer" + """The dataset contains a question column and an answer column.""" + eval_messages_answer = "eval/messages-answer" + """The dataset contains a messages column with list of messages and an answer column.""" + + +class DatasetType(Enum): + """Type of the dataset source.""" + + uri = "uri" + """The dataset can be obtained from a URI.""" + rows = "rows" + """The dataset is stored in rows.""" + + +@json_schema_type +class URIDataSource(BaseModel): + """A dataset that can be obtained from a URI.""" + + type: Literal["uri"] = Field(default="uri", description="The type of data source.") + uri: str = Field( + ..., + description='The dataset can be obtained from a URI. E.g. "https://mywebsite.com/mydata.jsonl", "lsfs://mydata.jsonl", "data:csv;base64,{base64_content}"', + ) + + +@json_schema_type +class RowsDataSource(BaseModel): + """A dataset stored in rows.""" + + type: Literal["rows"] = Field(default="rows", description="The type of data source.") + rows: list[dict[str, Any]] = Field( + ..., + description='The dataset is stored in rows. E.g. [{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}]', + ) + + +DataSource = Annotated[ + URIDataSource | RowsDataSource, + Field(discriminator="type"), +] +register_schema(DataSource, name="DataSource") + + +class CommonDatasetFields(BaseModel): + """Common fields for a dataset.""" + + purpose: DatasetPurpose = Field(..., description="Purpose of the dataset indicating its intended use") + source: DataSource = Field(..., description="Data source configuration for the dataset") + metadata: dict[str, Any] = Field( + default_factory=dict, + description="Any additional metadata for this dataset", + ) + + +@json_schema_type +class Dataset(CommonDatasetFields, Resource): + """Dataset resource for storing and accessing training or evaluation data.""" + + type: Literal[ResourceType.dataset] = Field( + default=ResourceType.dataset, + description="Type of resource, always 'dataset' for datasets", + ) + + @property + def dataset_id(self) -> str: + return self.identifier + + @property + def provider_dataset_id(self) -> str | None: + return self.provider_resource_id + + +@json_schema_type +class ListDatasetsResponse(BaseModel): + """Response from listing datasets.""" + + data: list[Dataset] = Field(..., description="List of datasets") + + +# Request models for each endpoint + + +@json_schema_type +class RegisterDatasetRequest(BaseModel): + """Request model for registering a dataset.""" + + purpose: DatasetPurpose = Field(..., description="The purpose of the dataset.") + source: DataSource = Field(..., description="The data source of the dataset.") + metadata: dict[str, Any] | None = Field( + default=None, + description="The metadata for the dataset.", + ) + dataset_id: str | None = Field( + default=None, + description="The ID of the dataset. If not provided, an ID will be generated.", + ) + + +@json_schema_type +class GetDatasetRequest(BaseModel): + """Request model for getting a dataset by ID.""" + + dataset_id: str = Field(..., description="The ID of the dataset to get.") + + +@json_schema_type +class UnregisterDatasetRequest(BaseModel): + """Request model for unregistering a dataset.""" + + dataset_id: str = Field(..., description="The ID of the dataset to unregister.") + + +__all__ = [ + "CommonDatasetFields", + "Dataset", + "DatasetPurpose", + "DatasetType", + "DataSource", + "RowsDataSource", + "URIDataSource", + "ListDatasetsResponse", + "RegisterDatasetRequest", + "GetDatasetRequest", + "UnregisterDatasetRequest", +] diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py index 676d786c2..e31d1af79 100644 --- a/tests/unit/distribution/routers/test_routing_tables.py +++ b/tests/unit/distribution/routers/test_routing_tables.py @@ -35,6 +35,10 @@ from llama_stack_api import ( UnregisterBenchmarkRequest, URIDataSource, ) +from llama_stack_api.datasets import ( + RegisterDatasetRequest, + UnregisterDatasetRequest, +) class Impl: @@ -261,10 +265,18 @@ async def test_datasets_routing_table(cached_disk_dist_registry): # Register multiple datasets and verify listing await table.register_dataset( - dataset_id="test-dataset", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource(uri="test-uri") + RegisterDatasetRequest( + dataset_id="test-dataset", + purpose=DatasetPurpose.eval_messages_answer, + source=URIDataSource(uri="test-uri"), + ) ) await table.register_dataset( - dataset_id="test-dataset-2", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource(uri="test-uri-2") + RegisterDatasetRequest( + dataset_id="test-dataset-2", + purpose=DatasetPurpose.eval_messages_answer, + source=URIDataSource(uri="test-uri-2"), + ) ) datasets = await table.list_datasets() @@ -273,8 +285,8 @@ async def test_datasets_routing_table(cached_disk_dist_registry): assert "test-dataset" in dataset_ids assert "test-dataset-2" in dataset_ids - await table.unregister_dataset(dataset_id="test-dataset") - await table.unregister_dataset(dataset_id="test-dataset-2") + await table.unregister_dataset(UnregisterDatasetRequest(dataset_id="test-dataset")) + await table.unregister_dataset(UnregisterDatasetRequest(dataset_id="test-dataset-2")) datasets = await table.list_datasets() assert len(datasets.data) == 0