diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 0e5996e21..ae6a01e15 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -40,75 +40,7 @@ } ], "paths": { - "/v1/datasetio/rows": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/PaginatedRowsResult" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "DatasetIO" - ], - "description": "Get a paginated list of rows from a dataset.", - "parameters": [ - { - "name": "dataset_id", - "in": "query", - "description": "The ID of the dataset to get the rows from.", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "rows_in_page", - "in": "query", - "description": "The number of rows to get per page.", - "required": true, - "schema": { - "type": "integer" - } - }, - { - "name": "page_token", - "in": "query", - "description": "The token to get the next page of rows.", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "filter_condition", - "in": "query", - "description": "(Optional) A condition to filter the rows by.", - "required": false, - "schema": { - "type": "string" - } - } - ] - }, + "/v1/datasets/{dataset_id}/rows": { "post": { "responses": { "200": { @@ -131,7 +63,16 @@ "DatasetIO" ], "description": "", - "parameters": [], + "parameters": [ + { + "name": "dataset_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -2194,6 +2135,76 @@ } } }, + "/v1/datasets/{dataset_id}/iterrows": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PaginatedRowsResult" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "DatasetIO" + ], + "description": "Get a paginated list of rows from a dataset.", + "parameters": [ + { + "name": "dataset_id", + "in": "path", + "description": "The ID of the dataset to get the rows from.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "rows_in_page", + "in": "query", + "description": "The number of rows to get per page.", + "required": true, + "schema": { + "type": "integer" + } + }, + { + "name": "page_token", + "in": "query", + "description": "The token to get the next page of rows.", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "filter_condition", + "in": "query", + "description": "(Optional) A condition to filter the rows by.", + "required": false, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": { "get": { "responses": { @@ -3789,9 +3800,6 @@ "AppendRowsRequest": { "type": "object", "properties": { - "dataset_id": { - "type": "string" - }, "rows": { "type": "array", "items": { @@ -3823,7 +3831,6 @@ }, "additionalProperties": false, "required": [ - "dataset_id", "rows" ], "title": "AppendRowsRequest" @@ -6794,9 +6801,6 @@ { "$ref": "#/components/schemas/URIDataSource" }, - { - "$ref": "#/components/schemas/HuggingfaceDataSource" - }, { "$ref": "#/components/schemas/RowsDataSource" } @@ -6805,7 +6809,6 @@ "propertyName": "type", "mapping": { "uri": "#/components/schemas/URIDataSource", - "huggingface": "#/components/schemas/HuggingfaceDataSource", "rows": "#/components/schemas/RowsDataSource" } } @@ -6878,65 +6881,6 @@ ], "title": "Dataset" }, - "HuggingfaceDataSource": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "huggingface", - "default": "huggingface", - "description": "The type of the data source." - }, - "huggingface": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "The path to the dataset in Huggingface. E.g. - \"llamastack/simpleqa\"" - }, - "params": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The parameters for the dataset." - } - }, - "additionalProperties": false, - "required": [ - "path", - "params" - ], - "description": "The fields for a Huggingface dataset." - } - }, - "additionalProperties": false, - "required": [ - "type", - "huggingface" - ], - "title": "HuggingfaceDataSource", - "description": "A dataset stored in Huggingface." - }, "RowsDataSource": { "type": "object", "properties": { @@ -7070,55 +7014,6 @@ ], "title": "ModelType" }, - "PaginatedRowsResult": { - "type": "object", - "properties": { - "rows": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "description": "The rows in the current page." - }, - "total_count": { - "type": "integer", - "description": "The total number of rows in the dataset." - }, - "next_page_token": { - "type": "string", - "description": "The token to get the next page of rows." - } - }, - "additionalProperties": false, - "required": [ - "rows", - "total_count" - ], - "title": "PaginatedRowsResult", - "description": "A paginated list of rows from a dataset." - }, "AgentTurnInputType": { "type": "object", "properties": { @@ -8153,6 +8048,55 @@ ], "title": "ToolInvocationResult" }, + "PaginatedRowsResult": { + "type": "object", + "properties": { + "rows": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "The rows in the current page." + }, + "total_count": { + "type": "integer", + "description": "The total number of rows in the dataset." + }, + "next_page_token": { + "type": "string", + "description": "The token to get the next page of rows." + } + }, + "additionalProperties": false, + "required": [ + "rows", + "total_count" + ], + "title": "PaginatedRowsResult", + "description": "A paginated list of rows from a dataset." + }, "ListAgentSessionsResponse": { "type": "object", "properties": { @@ -9431,7 +9375,7 @@ }, "source": { "$ref": "#/components/schemas/DataSource", - "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"huggingface\": { \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }" + "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }" }, "metadata": { "type": "object", @@ -9461,7 +9405,7 @@ }, "dataset_id": { "type": "string", - "description": "The ID of the dataset. If not provided, a random ID will be generated." + "description": "The ID of the dataset. If not provided, an ID will be generated." } }, "additionalProperties": false, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 1e6fc77cc..45d15407c 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -10,56 +10,7 @@ info: servers: - url: http://any-hosted-llama-stack.com paths: - /v1/datasetio/rows: - get: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: '#/components/schemas/PaginatedRowsResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - DatasetIO - description: >- - Get a paginated list of rows from a dataset. - parameters: - - name: dataset_id - in: query - description: >- - The ID of the dataset to get the rows from. - required: true - schema: - type: string - - name: rows_in_page - in: query - description: The number of rows to get per page. - required: true - schema: - type: integer - - name: page_token - in: query - description: The token to get the next page of rows. - required: false - schema: - type: string - - name: filter_condition - in: query - description: >- - (Optional) A condition to filter the rows by. - required: false - schema: - type: string + /v1/datasets/{dataset_id}/rows: post: responses: '200': @@ -77,7 +28,12 @@ paths: tags: - DatasetIO description: '' - parameters: [] + parameters: + - name: dataset_id + in: path + required: true + schema: + type: string requestBody: content: application/json: @@ -1473,6 +1429,56 @@ paths: schema: $ref: '#/components/schemas/InvokeToolRequest' required: true + /v1/datasets/{dataset_id}/iterrows: + get: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/PaginatedRowsResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - DatasetIO + description: >- + Get a paginated list of rows from a dataset. + parameters: + - name: dataset_id + in: path + description: >- + The ID of the dataset to get the rows from. + required: true + schema: + type: string + - name: rows_in_page + in: query + description: The number of rows to get per page. + required: true + schema: + type: integer + - name: page_token + in: query + description: The token to get the next page of rows. + required: false + schema: + type: string + - name: filter_condition + in: query + description: >- + (Optional) A condition to filter the rows by. + required: false + schema: + type: string /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}: get: responses: @@ -2583,8 +2589,6 @@ components: AppendRowsRequest: type: object properties: - dataset_id: - type: string rows: type: array items: @@ -2599,7 +2603,6 @@ components: - type: object additionalProperties: false required: - - dataset_id - rows title: AppendRowsRequest CompletionMessage: @@ -4705,13 +4708,11 @@ components: DataSource: oneOf: - $ref: '#/components/schemas/URIDataSource' - - $ref: '#/components/schemas/HuggingfaceDataSource' - $ref: '#/components/schemas/RowsDataSource' discriminator: propertyName: type mapping: uri: '#/components/schemas/URIDataSource' - huggingface: '#/components/schemas/HuggingfaceDataSource' rows: '#/components/schemas/RowsDataSource' Dataset: type: object @@ -4757,43 +4758,6 @@ components: - source - metadata title: Dataset - HuggingfaceDataSource: - type: object - properties: - type: - type: string - const: huggingface - default: huggingface - description: The type of the data source. - huggingface: - type: object - properties: - path: - type: string - description: >- - The path to the dataset in Huggingface. E.g. - "llamastack/simpleqa" - params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters for the dataset. - additionalProperties: false - required: - - path - - params - description: The fields for a Huggingface dataset. - additionalProperties: false - required: - - type - - huggingface - title: HuggingfaceDataSource - description: A dataset stored in Huggingface. RowsDataSource: type: object properties: @@ -4883,34 +4847,6 @@ components: - llm - embedding title: ModelType - PaginatedRowsResult: - type: object - properties: - rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows in the current page. - total_count: - type: integer - description: The total number of rows in the dataset. - next_page_token: - type: string - description: The token to get the next page of rows. - additionalProperties: false - required: - - rows - - total_count - title: PaginatedRowsResult - description: A paginated list of rows from a dataset. AgentTurnInputType: type: object properties: @@ -5557,6 +5493,34 @@ components: required: - content title: ToolInvocationResult + PaginatedRowsResult: + type: object + properties: + rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The rows in the current page. + total_count: + type: integer + description: The total number of rows in the dataset. + next_page_token: + type: string + description: The token to get the next page of rows. + additionalProperties: false + required: + - rows + - total_count + title: PaginatedRowsResult + description: A paginated list of rows from a dataset. ListAgentSessionsResponse: type: object properties: @@ -6401,11 +6365,11 @@ components: $ref: '#/components/schemas/DataSource' description: >- The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl" - } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "huggingface", - "huggingface": { "dataset_path": "tatsu-lab/alpaca", "params": { "split": - "train" } } } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", - "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, - world!"}, ] } ] } + } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "uri", + "uri": "data:csv;base64,{base64_content}" } - { "type": "uri", "uri": + "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows", + "rows": [ { "messages": [ {"role": "user", "content": "Hello, world!"}, + {"role": "assistant", "content": "Hello, world!"}, ] } ] } metadata: type: object additionalProperties: @@ -6421,7 +6385,7 @@ components: dataset_id: type: string description: >- - The ID of the dataset. If not provided, a random ID will be generated. + The ID of the dataset. If not provided, an ID will be generated. additionalProperties: false required: - purpose diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index c25b861c3..9fe3cd1e0 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -193,7 +193,7 @@ class Datasets(Protocol): } :param metadata: The metadata for the dataset. - E.g. {"description": "My dataset"} - :param dataset_id: The ID of the dataset. If not provided, a random ID will be generated. + :param dataset_id: The ID of the dataset. If not provided, an ID will be generated. """ ...