mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-23 04:53:14 +00:00
feat(api): (1/n) datasets api clean up (#1573)
## PR Stack - https://github.com/meta-llama/llama-stack/pull/1573 - https://github.com/meta-llama/llama-stack/pull/1625 - https://github.com/meta-llama/llama-stack/pull/1656 - https://github.com/meta-llama/llama-stack/pull/1657 - https://github.com/meta-llama/llama-stack/pull/1658 - https://github.com/meta-llama/llama-stack/pull/1659 - https://github.com/meta-llama/llama-stack/pull/1660 **Client SDK** - https://github.com/meta-llama/llama-stack-client-python/pull/203 **CI** -1391130488
<img width="1042" alt="image" src="https://github.com/user-attachments/assets/69636067-376d-436b-9204-896e2dd490ca" /> -- the test_rag_agent_with_attachments is flaky and not related to this PR ## Doc <img width="789" alt="image" src="https://github.com/user-attachments/assets/b88390f3-73d6-4483-b09a-a192064e32d9" /> ## Client Usage ```python client.datasets.register( source={ "type": "uri", "uri": "lsfs://mydata.jsonl", }, schema="jsonl_messages", # optional dataset_id="my_first_train_data" ) # quick prototype debugging client.datasets.register( data_reference={ "type": "rows", "rows": [ "messages": [...], ], }, schema="jsonl_messages", ) ``` ## Test Plan - CI:1387805545
``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/datasets/test_datasets.py ``` ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/scoring/test_scoring.py ``` ``` pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb ```
This commit is contained in:
parent
3b35a39b8b
commit
5287b437ae
29 changed files with 2593 additions and 2296 deletions
499
docs/_static/llama-stack-spec.yaml
vendored
499
docs/_static/llama-stack-spec.yaml
vendored
|
@ -10,56 +10,7 @@ info:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
paths:
|
||||
/v1/datasetio/rows:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PaginatedRowsResult'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- DatasetIO
|
||||
description: >-
|
||||
Get a paginated list of rows from a dataset.
|
||||
parameters:
|
||||
- name: dataset_id
|
||||
in: query
|
||||
description: >-
|
||||
The ID of the dataset to get the rows from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: rows_in_page
|
||||
in: query
|
||||
description: The number of rows to get per page.
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
- name: page_token
|
||||
in: query
|
||||
description: The token to get the next page of rows.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: filter_condition
|
||||
in: query
|
||||
description: >-
|
||||
(Optional) A condition to filter the rows by.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
/v1/datasetio/append-rows/{dataset_id}:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -77,7 +28,12 @@ paths:
|
|||
tags:
|
||||
- DatasetIO
|
||||
description: ''
|
||||
parameters: []
|
||||
parameters:
|
||||
- name: dataset_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
|
@ -394,7 +350,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: List all buckets.
|
||||
parameters:
|
||||
- name: bucket
|
||||
|
@ -421,7 +377,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: >-
|
||||
Create a new upload session for a file identified by a bucket and key.
|
||||
parameters: []
|
||||
|
@ -580,7 +536,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: >-
|
||||
Get a file info identified by a bucket and key.
|
||||
parameters:
|
||||
|
@ -616,7 +572,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: >-
|
||||
Delete a file identified by a bucket and key.
|
||||
parameters:
|
||||
|
@ -1268,7 +1224,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: >-
|
||||
Returns information about an existsing upload session
|
||||
parameters:
|
||||
|
@ -1299,7 +1255,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: >-
|
||||
Upload file content to an existing upload session. On the server, request
|
||||
body will have the raw bytes that are uploaded.
|
||||
|
@ -1501,6 +1457,50 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/InvokeToolRequest'
|
||||
required: true
|
||||
/v1/datasetio/iterrows/{dataset_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/IterrowsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- DatasetIO
|
||||
description: >-
|
||||
Get a paginated list of rows from a dataset. Uses cursor-based pagination.
|
||||
parameters:
|
||||
- name: dataset_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the dataset to get the rows from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: start_index
|
||||
in: query
|
||||
description: >-
|
||||
Index into dataset for the first row to get. Get all rows if None.
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
- name: limit
|
||||
in: query
|
||||
description: The number of rows to get.
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -1710,6 +1710,10 @@ paths:
|
|||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Dataset'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -1722,7 +1726,7 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Datasets
|
||||
description: ''
|
||||
description: Register a new dataset.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -1750,7 +1754,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: List all files in a bucket.
|
||||
parameters:
|
||||
- name: bucket
|
||||
|
@ -2607,8 +2611,6 @@ components:
|
|||
AppendRowsRequest:
|
||||
type: object
|
||||
properties:
|
||||
dataset_id:
|
||||
type: string
|
||||
rows:
|
||||
type: array
|
||||
items:
|
||||
|
@ -2623,7 +2625,6 @@ components:
|
|||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- dataset_id
|
||||
- rows
|
||||
title: AppendRowsRequest
|
||||
CompletionMessage:
|
||||
|
@ -4726,6 +4727,148 @@ components:
|
|||
- scoring_functions
|
||||
- metadata
|
||||
title: Benchmark
|
||||
DataSource:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/URIDataSource'
|
||||
- $ref: '#/components/schemas/RowsDataSource'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
uri: '#/components/schemas/URIDataSource'
|
||||
rows: '#/components/schemas/RowsDataSource'
|
||||
Dataset:
|
||||
type: object
|
||||
properties:
|
||||
identifier:
|
||||
type: string
|
||||
provider_resource_id:
|
||||
type: string
|
||||
provider_id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: dataset
|
||||
default: dataset
|
||||
purpose:
|
||||
type: string
|
||||
enum:
|
||||
- post-training/messages
|
||||
- eval/question-answer
|
||||
- eval/messages-answer
|
||||
title: DatasetPurpose
|
||||
description: >-
|
||||
Purpose of the dataset. Each purpose has a required input data schema.
|
||||
source:
|
||||
$ref: '#/components/schemas/DataSource'
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- identifier
|
||||
- provider_resource_id
|
||||
- provider_id
|
||||
- type
|
||||
- purpose
|
||||
- source
|
||||
- metadata
|
||||
title: Dataset
|
||||
RowsDataSource:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: rows
|
||||
default: rows
|
||||
rows:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
|
||||
"content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
|
||||
world!"}]} ]
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- rows
|
||||
title: RowsDataSource
|
||||
description: A dataset stored in rows.
|
||||
URIDataSource:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: uri
|
||||
default: uri
|
||||
uri:
|
||||
type: string
|
||||
description: >-
|
||||
The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
|
||||
- "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- uri
|
||||
title: URIDataSource
|
||||
description: >-
|
||||
A dataset that can be obtained from a URI.
|
||||
Model:
|
||||
type: object
|
||||
properties:
|
||||
identifier:
|
||||
type: string
|
||||
provider_resource_id:
|
||||
type: string
|
||||
provider_id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: model
|
||||
default: model
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
model_type:
|
||||
$ref: '#/components/schemas/ModelType'
|
||||
default: llm
|
||||
additionalProperties: false
|
||||
required:
|
||||
- identifier
|
||||
- provider_resource_id
|
||||
- provider_id
|
||||
- type
|
||||
- metadata
|
||||
- model_type
|
||||
title: Model
|
||||
ModelType:
|
||||
type: string
|
||||
enum:
|
||||
- llm
|
||||
- embedding
|
||||
title: ModelType
|
||||
AgentTurnInputType:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4781,45 +4924,6 @@ components:
|
|||
required:
|
||||
- type
|
||||
title: CompletionInputType
|
||||
Dataset:
|
||||
type: object
|
||||
properties:
|
||||
identifier:
|
||||
type: string
|
||||
provider_resource_id:
|
||||
type: string
|
||||
provider_id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: dataset
|
||||
default: dataset
|
||||
dataset_schema:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ParamType'
|
||||
url:
|
||||
$ref: '#/components/schemas/URL'
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- identifier
|
||||
- provider_resource_id
|
||||
- provider_id
|
||||
- type
|
||||
- dataset_schema
|
||||
- url
|
||||
- metadata
|
||||
title: Dataset
|
||||
JsonType:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4878,97 +4982,6 @@ components:
|
|||
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
|
||||
completion_input: '#/components/schemas/CompletionInputType'
|
||||
agent_turn_input: '#/components/schemas/AgentTurnInputType'
|
||||
StringType:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: string
|
||||
default: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: StringType
|
||||
UnionType:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: union
|
||||
default: union
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: UnionType
|
||||
Model:
|
||||
type: object
|
||||
properties:
|
||||
identifier:
|
||||
type: string
|
||||
provider_resource_id:
|
||||
type: string
|
||||
provider_id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: model
|
||||
default: model
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
model_type:
|
||||
$ref: '#/components/schemas/ModelType'
|
||||
default: llm
|
||||
additionalProperties: false
|
||||
required:
|
||||
- identifier
|
||||
- provider_resource_id
|
||||
- provider_id
|
||||
- type
|
||||
- metadata
|
||||
- model_type
|
||||
title: Model
|
||||
ModelType:
|
||||
type: string
|
||||
enum:
|
||||
- llm
|
||||
- embedding
|
||||
title: ModelType
|
||||
PaginatedRowsResult:
|
||||
type: object
|
||||
properties:
|
||||
rows:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The rows in the current page.
|
||||
total_count:
|
||||
type: integer
|
||||
description: The total number of rows in the dataset.
|
||||
next_page_token:
|
||||
type: string
|
||||
description: The token to get the next page of rows.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- rows
|
||||
- total_count
|
||||
title: PaginatedRowsResult
|
||||
description: A paginated list of rows from a dataset.
|
||||
ScoringFn:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5007,6 +5020,28 @@ components:
|
|||
- metadata
|
||||
- return_type
|
||||
title: ScoringFn
|
||||
StringType:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: string
|
||||
default: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: StringType
|
||||
UnionType:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: union
|
||||
default: union
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: UnionType
|
||||
Shield:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5506,6 +5541,32 @@ components:
|
|||
required:
|
||||
- content
|
||||
title: ToolInvocationResult
|
||||
IterrowsResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The rows in the current page.
|
||||
next_start_index:
|
||||
type: integer
|
||||
description: >-
|
||||
Index into dataset for the first row in the next page. None if there are
|
||||
no more rows.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: IterrowsResponse
|
||||
description: A paginated list of rows from a dataset.
|
||||
ListAgentSessionsResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -6313,18 +6374,35 @@ components:
|
|||
RegisterDatasetRequest:
|
||||
type: object
|
||||
properties:
|
||||
dataset_id:
|
||||
type: string
|
||||
dataset_schema:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ParamType'
|
||||
url:
|
||||
$ref: '#/components/schemas/URL'
|
||||
provider_dataset_id:
|
||||
type: string
|
||||
provider_id:
|
||||
purpose:
|
||||
type: string
|
||||
enum:
|
||||
- post-training/messages
|
||||
- eval/question-answer
|
||||
- eval/messages-answer
|
||||
description: >-
|
||||
The purpose of the dataset. One of - "post-training/messages": The dataset
|
||||
contains a messages column with list of messages for post-training. {
|
||||
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
|
||||
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
|
||||
contains a question column and an answer column for evaluation. { "question":
|
||||
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
|
||||
The dataset contains a messages column with list of messages and an answer
|
||||
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
|
||||
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
|
||||
Doe. How can I help you today?"}, {"role": "user", "content": "What's
|
||||
my name?"}, ], "answer": "John Doe" }
|
||||
source:
|
||||
$ref: '#/components/schemas/DataSource'
|
||||
description: >-
|
||||
The data source of the dataset. Ensure that the data source schema is
|
||||
compatible with the purpose of the dataset. Examples: - { "type": "uri",
|
||||
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
|
||||
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
|
||||
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
|
||||
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
|
||||
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
|
||||
} ] }
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
@ -6335,11 +6413,16 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
The metadata for the dataset. - E.g. {"description": "My dataset"}
|
||||
dataset_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of the dataset. If not provided, an ID will be generated.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- dataset_id
|
||||
- dataset_schema
|
||||
- url
|
||||
- purpose
|
||||
- source
|
||||
title: RegisterDatasetRequest
|
||||
RegisterModelRequest:
|
||||
type: object
|
||||
|
@ -6855,7 +6938,7 @@ tags:
|
|||
- name: Eval
|
||||
x-displayName: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
- name: Files (Coming Soon)
|
||||
- name: Files
|
||||
- name: Inference
|
||||
description: >-
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
|
@ -6893,7 +6976,7 @@ x-tagGroups:
|
|||
- DatasetIO
|
||||
- Datasets
|
||||
- Eval
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
- Inference
|
||||
- Inspect
|
||||
- Models
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue