forked from phoenix-oss/llama-stack-mirror
update
This commit is contained in:
parent
02aa9a1e85
commit
0e47c65051
3 changed files with 294 additions and 82 deletions
185
docs/_static/llama-stack-spec.html
vendored
185
docs/_static/llama-stack-spec.html
vendored
|
@ -6838,6 +6838,27 @@
|
||||||
],
|
],
|
||||||
"title": "Benchmark"
|
"title": "Benchmark"
|
||||||
},
|
},
|
||||||
|
"DataReference": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/URIDataReference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/HuggingfaceDataReference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/RowsDataReference"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"discriminator": {
|
||||||
|
"propertyName": "type",
|
||||||
|
"mapping": {
|
||||||
|
"uri": "#/components/schemas/URIDataReference",
|
||||||
|
"huggingface": "#/components/schemas/HuggingfaceDataReference",
|
||||||
|
"rows": "#/components/schemas/RowsDataReference"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"Dataset": {
|
"Dataset": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -6856,10 +6877,15 @@
|
||||||
"default": "dataset"
|
"default": "dataset"
|
||||||
},
|
},
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/Schema"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"jsonl_messages"
|
||||||
|
],
|
||||||
|
"title": "Schema",
|
||||||
|
"description": "Schema of the dataset. Each type has a different column format."
|
||||||
},
|
},
|
||||||
"uri": {
|
"data_reference": {
|
||||||
"type": "string"
|
"$ref": "#/components/schemas/DataReference"
|
||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -6894,18 +6920,118 @@
|
||||||
"provider_id",
|
"provider_id",
|
||||||
"type",
|
"type",
|
||||||
"schema",
|
"schema",
|
||||||
"uri",
|
"data_reference",
|
||||||
"metadata"
|
"metadata"
|
||||||
],
|
],
|
||||||
"title": "Dataset"
|
"title": "Dataset"
|
||||||
},
|
},
|
||||||
"Schema": {
|
"HuggingfaceDataReference": {
|
||||||
"type": "string",
|
"type": "object",
|
||||||
"enum": [
|
"properties": {
|
||||||
"jsonl_messages"
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "huggingface",
|
||||||
|
"default": "huggingface"
|
||||||
|
},
|
||||||
|
"dataset_path": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"params": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"dataset_path",
|
||||||
|
"params"
|
||||||
],
|
],
|
||||||
"title": "Schema",
|
"title": "HuggingfaceDataReference"
|
||||||
"description": "Schema of the dataset. Each type has a different column format."
|
},
|
||||||
|
"RowsDataReference": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "rows",
|
||||||
|
"default": "rows"
|
||||||
|
},
|
||||||
|
"rows": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"rows"
|
||||||
|
],
|
||||||
|
"title": "RowsDataReference"
|
||||||
|
},
|
||||||
|
"URIDataReference": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "uri",
|
||||||
|
"default": "uri"
|
||||||
|
},
|
||||||
|
"uri": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"uri"
|
||||||
|
],
|
||||||
|
"title": "URIDataReference"
|
||||||
},
|
},
|
||||||
"Model": {
|
"Model": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -9255,38 +9381,15 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/Schema",
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"jsonl_messages"
|
||||||
|
],
|
||||||
"description": "The schema format of the dataset. One of - jsonl_messages: The dataset is a JSONL file with messages in column format"
|
"description": "The schema format of the dataset. One of - jsonl_messages: The dataset is a JSONL file with messages in column format"
|
||||||
},
|
},
|
||||||
"uri": {
|
"data_reference": {
|
||||||
"type": "string",
|
"$ref": "#/components/schemas/DataReference",
|
||||||
"description": "The URI of the dataset. Examples: - file://mydata.jsonl - s3://mybucket/myfile.jsonl - https://mywebsite.com/myfile.jsonl - huggingface://tatsu-lab/alpaca"
|
"description": "The data reference of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [{\"message\": \"Hello, world!\"}] }"
|
||||||
},
|
|
||||||
"uri_params": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "null"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "array"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"description": "The parameters for the URI. - E.g. If URL is a huggingface dataset, parameters could be uri_params={\"split\": \"train\"}"
|
|
||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -9322,7 +9425,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"schema",
|
"schema",
|
||||||
"uri"
|
"data_reference"
|
||||||
],
|
],
|
||||||
"title": "RegisterDatasetRequest"
|
"title": "RegisterDatasetRequest"
|
||||||
},
|
},
|
||||||
|
|
122
docs/_static/llama-stack-spec.yaml
vendored
122
docs/_static/llama-stack-spec.yaml
vendored
|
@ -4731,6 +4731,17 @@ components:
|
||||||
- scoring_functions
|
- scoring_functions
|
||||||
- metadata
|
- metadata
|
||||||
title: Benchmark
|
title: Benchmark
|
||||||
|
DataReference:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/URIDataReference'
|
||||||
|
- $ref: '#/components/schemas/HuggingfaceDataReference'
|
||||||
|
- $ref: '#/components/schemas/RowsDataReference'
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
uri: '#/components/schemas/URIDataReference'
|
||||||
|
huggingface: '#/components/schemas/HuggingfaceDataReference'
|
||||||
|
rows: '#/components/schemas/RowsDataReference'
|
||||||
Dataset:
|
Dataset:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -4745,9 +4756,14 @@ components:
|
||||||
const: dataset
|
const: dataset
|
||||||
default: dataset
|
default: dataset
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/Schema'
|
|
||||||
uri:
|
|
||||||
type: string
|
type: string
|
||||||
|
enum:
|
||||||
|
- jsonl_messages
|
||||||
|
title: Schema
|
||||||
|
description: >-
|
||||||
|
Schema of the dataset. Each type has a different column format.
|
||||||
|
data_reference:
|
||||||
|
$ref: '#/components/schemas/DataReference'
|
||||||
metadata:
|
metadata:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
@ -4765,16 +4781,72 @@ components:
|
||||||
- provider_id
|
- provider_id
|
||||||
- type
|
- type
|
||||||
- schema
|
- schema
|
||||||
- uri
|
- data_reference
|
||||||
- metadata
|
- metadata
|
||||||
title: Dataset
|
title: Dataset
|
||||||
Schema:
|
HuggingfaceDataReference:
|
||||||
type: string
|
type: object
|
||||||
enum:
|
properties:
|
||||||
- jsonl_messages
|
type:
|
||||||
title: Schema
|
type: string
|
||||||
description: >-
|
const: huggingface
|
||||||
Schema of the dataset. Each type has a different column format.
|
default: huggingface
|
||||||
|
dataset_path:
|
||||||
|
type: string
|
||||||
|
params:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- dataset_path
|
||||||
|
- params
|
||||||
|
title: HuggingfaceDataReference
|
||||||
|
RowsDataReference:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: rows
|
||||||
|
default: rows
|
||||||
|
rows:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- rows
|
||||||
|
title: RowsDataReference
|
||||||
|
URIDataReference:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: uri
|
||||||
|
default: uri
|
||||||
|
uri:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- uri
|
||||||
|
title: URIDataReference
|
||||||
Model:
|
Model:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6272,28 +6344,20 @@ components:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/Schema'
|
type: string
|
||||||
|
enum:
|
||||||
|
- jsonl_messages
|
||||||
description: >-
|
description: >-
|
||||||
The schema format of the dataset. One of - jsonl_messages: The dataset
|
The schema format of the dataset. One of - jsonl_messages: The dataset
|
||||||
is a JSONL file with messages in column format
|
is a JSONL file with messages in column format
|
||||||
uri:
|
data_reference:
|
||||||
type: string
|
$ref: '#/components/schemas/DataReference'
|
||||||
description: >-
|
description: >-
|
||||||
The URI of the dataset. Examples: - file://mydata.jsonl - s3://mybucket/myfile.jsonl
|
The data reference of the dataset. Examples: - { "type": "uri", "uri":
|
||||||
- https://mywebsite.com/myfile.jsonl - huggingface://tatsu-lab/alpaca
|
"https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": "lsfs://mydata.jsonl"
|
||||||
uri_params:
|
} - { "type": "huggingface", "dataset_path": "tatsu-lab/alpaca", "params":
|
||||||
type: object
|
{ "split": "train" } } - { "type": "rows", "rows": [{"message": "Hello,
|
||||||
additionalProperties:
|
world!"}] }
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
|
||||||
The parameters for the URI. - E.g. If URL is a huggingface dataset, parameters
|
|
||||||
could be uri_params={"split": "train"}
|
|
||||||
metadata:
|
metadata:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
@ -6313,7 +6377,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- schema
|
- schema
|
||||||
- uri
|
- data_reference
|
||||||
title: RegisterDatasetRequest
|
title: RegisterDatasetRequest
|
||||||
RegisterModelRequest:
|
RegisterModelRequest:
|
||||||
type: object
|
type: object
|
||||||
|
|
|
@ -5,12 +5,12 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, List, Literal, Optional, Protocol
|
from typing import Any, Dict, List, Literal, Optional, Protocol, Annotated, Union
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_stack.apis.resource import Resource, ResourceType
|
from llama_stack.apis.resource import Resource, ResourceType
|
||||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
from llama_stack.schema_utils import json_schema_type, webmethod, register_schema
|
||||||
|
|
||||||
|
|
||||||
class Schema(Enum):
|
class Schema(Enum):
|
||||||
|
@ -29,9 +29,42 @@ class Schema(Enum):
|
||||||
# TODO: add more schemas here
|
# TODO: add more schemas here
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetType(Enum):
|
||||||
|
huggingface = "huggingface"
|
||||||
|
uri = "uri"
|
||||||
|
rows = "rows"
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class URIDataReference(BaseModel):
|
||||||
|
type: Literal["uri"] = "uri"
|
||||||
|
uri: str
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class HuggingfaceDataReference(BaseModel):
|
||||||
|
type: Literal["huggingface"] = "huggingface"
|
||||||
|
dataset_path: str
|
||||||
|
params: Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class RowsDataReference(BaseModel):
|
||||||
|
type: Literal["rows"] = "rows"
|
||||||
|
rows: List[Dict[str, Any]]
|
||||||
|
|
||||||
|
|
||||||
|
DataReference = register_schema(
|
||||||
|
Annotated[
|
||||||
|
Union[URIDataReference, HuggingfaceDataReference, RowsDataReference],
|
||||||
|
Field(discriminator="type"),
|
||||||
|
],
|
||||||
|
name="DataReference",
|
||||||
|
)
|
||||||
|
|
||||||
class CommonDatasetFields(BaseModel):
|
class CommonDatasetFields(BaseModel):
|
||||||
schema: Schema
|
schema: Schema
|
||||||
uri: str
|
data_reference: DataReference
|
||||||
metadata: Dict[str, Any] = Field(
|
metadata: Dict[str, Any] = Field(
|
||||||
default_factory=dict,
|
default_factory=dict,
|
||||||
description="Any additional metadata for this dataset",
|
description="Any additional metadata for this dataset",
|
||||||
|
@ -66,8 +99,7 @@ class Datasets(Protocol):
|
||||||
async def register_dataset(
|
async def register_dataset(
|
||||||
self,
|
self,
|
||||||
schema: Schema,
|
schema: Schema,
|
||||||
uri: str,
|
data_reference: DataReference,
|
||||||
uri_params: Optional[Dict[str, Any]] = None,
|
|
||||||
metadata: Optional[Dict[str, Any]] = None,
|
metadata: Optional[Dict[str, Any]] = None,
|
||||||
dataset_id: Optional[str] = None,
|
dataset_id: Optional[str] = None,
|
||||||
) -> Dataset:
|
) -> Dataset:
|
||||||
|
@ -76,13 +108,26 @@ class Datasets(Protocol):
|
||||||
|
|
||||||
:param schema: The schema format of the dataset. One of
|
:param schema: The schema format of the dataset. One of
|
||||||
- jsonl_messages: The dataset is a JSONL file with messages in column format
|
- jsonl_messages: The dataset is a JSONL file with messages in column format
|
||||||
:param uri: The URI of the dataset. Examples:
|
:param data_reference: The data reference of the dataset. Examples:
|
||||||
- file://mydata.jsonl
|
- {
|
||||||
- s3://mybucket/myfile.jsonl
|
"type": "uri",
|
||||||
- https://mywebsite.com/myfile.jsonl
|
"uri": "https://mywebsite.com/mydata.jsonl"
|
||||||
- huggingface://tatsu-lab/alpaca
|
}
|
||||||
:param uri_params: The parameters for the URI.
|
- {
|
||||||
- E.g. If URL is a huggingface dataset, parameters could be uri_params={"split": "train"}
|
"type": "uri",
|
||||||
|
"uri": "lsfs://mydata.jsonl"
|
||||||
|
}
|
||||||
|
- {
|
||||||
|
"type": "huggingface",
|
||||||
|
"dataset_path": "tatsu-lab/alpaca",
|
||||||
|
"params": {
|
||||||
|
"split": "train"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- {
|
||||||
|
"type": "rows",
|
||||||
|
"rows": [{"message": "Hello, world!"}]
|
||||||
|
}
|
||||||
:param metadata: The metadata for the dataset.
|
:param metadata: The metadata for the dataset.
|
||||||
- E.g. {"description": "My dataset"}
|
- E.g. {"description": "My dataset"}
|
||||||
:param dataset_id: The ID of the dataset. If not provided, a random ID will be generated.
|
:param dataset_id: The ID of the dataset. If not provided, a random ID will be generated.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue