diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index a698c2c9c..7ba26acb7 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -2518,7 +2518,7 @@ "tags": [ "Datasets" ], - "description": "Register a new dataset through a file or", + "description": "Register a new dataset.", "parameters": [], "requestBody": { "content": { @@ -7144,24 +7144,24 @@ ], "title": "Benchmark" }, - "DataReference": { + "DataSource": { "oneOf": [ { - "$ref": "#/components/schemas/URIDataReference" + "$ref": "#/components/schemas/URIDataSource" }, { - "$ref": "#/components/schemas/HuggingfaceDataReference" + "$ref": "#/components/schemas/HuggingfaceDataSource" }, { - "$ref": "#/components/schemas/RowsDataReference" + "$ref": "#/components/schemas/RowsDataSource" } ], "discriminator": { "propertyName": "type", "mapping": { - "uri": "#/components/schemas/URIDataReference", - "huggingface": "#/components/schemas/HuggingfaceDataReference", - "rows": "#/components/schemas/RowsDataReference" + "uri": "#/components/schemas/URIDataSource", + "huggingface": "#/components/schemas/HuggingfaceDataSource", + "rows": "#/components/schemas/RowsDataSource" } } }, @@ -7185,13 +7185,13 @@ "schema": { "type": "string", "enum": [ - "jsonl_messages" + "messages" ], "title": "Schema", "description": "Schema of the dataset. Each type has a different column format." }, - "data_reference": { - "$ref": "#/components/schemas/DataReference" + "data_source": { + "$ref": "#/components/schemas/DataSource" }, "metadata": { "type": "object", @@ -7226,12 +7226,12 @@ "provider_id", "type", "schema", - "data_reference", + "data_source", "metadata" ], "title": "Dataset" }, - "HuggingfaceDataReference": { + "HuggingfaceDataSource": { "type": "object", "properties": { "type": { @@ -7274,9 +7274,9 @@ "dataset_path", "params" ], - "title": "HuggingfaceDataReference" + "title": "HuggingfaceDataSource" }, - "RowsDataReference": { + "RowsDataSource": { "type": "object", "properties": { "type": { @@ -7318,9 +7318,9 @@ "type", "rows" ], - "title": "RowsDataReference" + "title": "RowsDataSource" }, - "URIDataReference": { + "URIDataSource": { "type": "object", "properties": { "type": { @@ -7337,7 +7337,7 @@ "type", "uri" ], - "title": "URIDataReference" + "title": "URIDataSource" }, "Model": { "type": "object", @@ -9506,13 +9506,13 @@ "schema": { "type": "string", "enum": [ - "jsonl_messages" + "messages" ], - "description": "The schema format of the dataset. One of - jsonl_messages: The dataset is a JSONL file with messages in column format" + "description": "The schema format of the dataset. One of - messages: The dataset contains a messages column with list of messages for post-training." }, - "data_reference": { - "$ref": "#/components/schemas/DataReference", - "description": "The data reference of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }" + "data_source": { + "$ref": "#/components/schemas/DataSource", + "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }" }, "metadata": { "type": "object", @@ -9548,7 +9548,7 @@ "additionalProperties": false, "required": [ "schema", - "data_reference" + "data_source" ], "title": "RegisterDatasetRequest" }, @@ -9602,29 +9602,9 @@ "RegisterScoringFunctionRequest": { "type": "object", "properties": { - "scoring_fn_type": { - "type": "string", - "enum": [ - "custom_llm_as_judge", - "regex_parser", - "regex_parser_math_response", - "equality", - "subset_of", - "factuality", - "faithfulness", - "answer_correctness", - "answer_relevancy", - "answer_similarity", - "context_entity_recall", - "context_precision", - "context_recall", - "context_relevancy" - ], - "description": "The type of scoring function to register." - }, - "params": { + "fn": { "$ref": "#/components/schemas/ScoringFnParams", - "description": "The parameters for the scoring function." + "description": "The type and parameters for the scoring function." }, "scoring_fn_id": { "type": "string", @@ -9659,8 +9639,7 @@ }, "additionalProperties": false, "required": [ - "scoring_fn_type", - "params" + "fn" ], "title": "RegisterScoringFunctionRequest" }, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 2fe35cc2c..e37c49100 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -1698,7 +1698,7 @@ paths: $ref: '#/components/responses/DefaultError' tags: - Datasets - description: Register a new dataset through a file or + description: Register a new dataset. parameters: [] requestBody: content: @@ -4974,17 +4974,17 @@ components: - scoring_functions - metadata title: Benchmark - DataReference: + DataSource: oneOf: - - $ref: '#/components/schemas/URIDataReference' - - $ref: '#/components/schemas/HuggingfaceDataReference' - - $ref: '#/components/schemas/RowsDataReference' + - $ref: '#/components/schemas/URIDataSource' + - $ref: '#/components/schemas/HuggingfaceDataSource' + - $ref: '#/components/schemas/RowsDataSource' discriminator: propertyName: type mapping: - uri: '#/components/schemas/URIDataReference' - huggingface: '#/components/schemas/HuggingfaceDataReference' - rows: '#/components/schemas/RowsDataReference' + uri: '#/components/schemas/URIDataSource' + huggingface: '#/components/schemas/HuggingfaceDataSource' + rows: '#/components/schemas/RowsDataSource' Dataset: type: object properties: @@ -5001,12 +5001,12 @@ components: schema: type: string enum: - - jsonl_messages + - messages title: Schema description: >- Schema of the dataset. Each type has a different column format. - data_reference: - $ref: '#/components/schemas/DataReference' + data_source: + $ref: '#/components/schemas/DataSource' metadata: type: object additionalProperties: @@ -5024,10 +5024,10 @@ components: - provider_id - type - schema - - data_reference + - data_source - metadata title: Dataset - HuggingfaceDataReference: + HuggingfaceDataSource: type: object properties: type: @@ -5051,8 +5051,8 @@ components: - type - dataset_path - params - title: HuggingfaceDataReference - RowsDataReference: + title: HuggingfaceDataSource + RowsDataSource: type: object properties: type: @@ -5075,8 +5075,8 @@ components: required: - type - rows - title: RowsDataReference - URIDataReference: + title: RowsDataSource + URIDataSource: type: object properties: type: @@ -5089,7 +5089,7 @@ components: required: - type - uri - title: URIDataReference + title: URIDataSource Model: type: object properties: @@ -6472,19 +6472,19 @@ components: schema: type: string enum: - - jsonl_messages + - messages description: >- - The schema format of the dataset. One of - jsonl_messages: The dataset - is a JSONL file with messages in column format - data_reference: - $ref: '#/components/schemas/DataReference' + The schema format of the dataset. One of - messages: The dataset contains + a messages column with list of messages for post-training. + data_source: + $ref: '#/components/schemas/DataSource' description: >- - The data reference of the dataset. Examples: - { "type": "uri", "uri": - "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": "lsfs://mydata.jsonl" - } - { "type": "huggingface", "dataset_path": "tatsu-lab/alpaca", "params": - { "split": "train" } } - { "type": "rows", "rows": [ { "messages": [ {"role": - "user", "content": "Hello, world!"}, {"role": "assistant", "content": - "Hello, world!"}, ] } ] } + The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl" + } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "huggingface", + "dataset_path": "tatsu-lab/alpaca", "params": { "split": "train" } } - + { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content": + "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] + } ] } metadata: type: object additionalProperties: @@ -6504,7 +6504,7 @@ components: additionalProperties: false required: - schema - - data_reference + - data_source title: RegisterDatasetRequest RegisterModelRequest: type: object @@ -6534,28 +6534,10 @@ components: RegisterScoringFunctionRequest: type: object properties: - scoring_fn_type: - type: string - enum: - - custom_llm_as_judge - - regex_parser - - regex_parser_math_response - - equality - - subset_of - - factuality - - faithfulness - - answer_correctness - - answer_relevancy - - answer_similarity - - context_entity_recall - - context_precision - - context_recall - - context_relevancy - description: >- - The type of scoring function to register. - params: + fn: $ref: '#/components/schemas/ScoringFnParams' - description: The parameters for the scoring function. + description: >- + The type and parameters for the scoring function. scoring_fn_id: type: string description: >- @@ -6576,8 +6558,7 @@ components: - E.g. {"description": "This scoring function is used for ..."} additionalProperties: false required: - - scoring_fn_type - - params + - fn title: RegisterScoringFunctionRequest RegisterShieldRequest: type: object diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index f20edca31..4b3ce3e6f 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho class Schema(Enum): """ Schema of the dataset. Each type has a different column format. - :cvar jsonl_messages: The dataset is a JSONL file with messages. Examples: + :cvar messages: The dataset contains messages used for post-training. Examples: { "messages": [ {"role": "user", "content": "Hello, world!"}, @@ -25,7 +25,7 @@ class Schema(Enum): } """ - jsonl_messages = "jsonl_messages" + messages = "messages" # TODO: add more schemas here @@ -36,36 +36,36 @@ class DatasetType(Enum): @json_schema_type -class URIDataReference(BaseModel): +class URIDataSource(BaseModel): type: Literal["uri"] = "uri" uri: str @json_schema_type -class HuggingfaceDataReference(BaseModel): +class HuggingfaceDataSource(BaseModel): type: Literal["huggingface"] = "huggingface" dataset_path: str params: Dict[str, Any] @json_schema_type -class RowsDataReference(BaseModel): +class RowsDataSource(BaseModel): type: Literal["rows"] = "rows" rows: List[Dict[str, Any]] -DataReference = register_schema( +DataSource = register_schema( Annotated[ - Union[URIDataReference, HuggingfaceDataReference, RowsDataReference], + Union[URIDataSource, HuggingfaceDataSource, RowsDataSource], Field(discriminator="type"), ], - name="DataReference", + name="DataSource", ) class CommonDatasetFields(BaseModel): schema: Schema - data_reference: DataReference + data_source: DataSource metadata: Dict[str, Any] = Field( default_factory=dict, description="Any additional metadata for this dataset", @@ -100,16 +100,16 @@ class Datasets(Protocol): async def register_dataset( self, schema: Schema, - data_reference: DataReference, + data_source: DataSource, metadata: Optional[Dict[str, Any]] = None, dataset_id: Optional[str] = None, ) -> Dataset: """ - Register a new dataset through a file or + Register a new dataset. :param schema: The schema format of the dataset. One of - - jsonl_messages: The dataset is a JSONL file with messages in column format - :param data_reference: The data reference of the dataset. Examples: + - messages: The dataset contains a messages column with list of messages for post-training. + :param data_source: The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl" diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py index 37c2fae84..243dc37e9 100644 --- a/llama_stack/apis/scoring_functions/scoring_functions.py +++ b/llama_stack/apis/scoring_functions/scoring_functions.py @@ -12,8 +12,8 @@ from typing import ( Literal, Optional, Protocol, - Union, runtime_checkable, + Union, ) from pydantic import BaseModel, Field @@ -218,7 +218,9 @@ class CommonScoringFnFields(BaseModel): @json_schema_type class ScoringFn(CommonScoringFnFields, Resource): - type: Literal[ResourceType.scoring_function.value] = ResourceType.scoring_function.value + type: Literal[ResourceType.scoring_function.value] = ( + ResourceType.scoring_function.value + ) @property def scoring_fn_id(self) -> str: @@ -245,13 +247,14 @@ class ScoringFunctions(Protocol): async def list_scoring_functions(self) -> ListScoringFunctionsResponse: ... @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET") - async def get_scoring_function(self, scoring_fn_id: str, /) -> Optional[ScoringFn]: ... + async def get_scoring_function( + self, scoring_fn_id: str, / + ) -> Optional[ScoringFn]: ... @webmethod(route="/scoring-functions", method="POST") async def register_scoring_function( self, - scoring_fn_type: ScoringFunctionType, - params: ScoringFnParams = None, + fn: ScoringFnParams, scoring_fn_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> ScoringFn: @@ -259,8 +262,7 @@ class ScoringFunctions(Protocol): Register a new scoring function with given parameters. Only valid scoring function type that can be parameterized can be registered. - :param scoring_fn_type: The type of scoring function to register. - :param params: The parameters for the scoring function. + :param fn: The type and parameters for the scoring function. :param scoring_fn_id: (Optional) The ID of the scoring function to register. If not provided, a random ID will be generated. :param metadata: (Optional) Any additional metadata to be associated with the scoring function. - E.g. {"description": "This scoring function is used for ..."}