diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index a698c2c9c..7ba26acb7 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -2518,7 +2518,7 @@
"tags": [
"Datasets"
],
- "description": "Register a new dataset through a file or",
+ "description": "Register a new dataset.",
"parameters": [],
"requestBody": {
"content": {
@@ -7144,24 +7144,24 @@
],
"title": "Benchmark"
},
- "DataReference": {
+ "DataSource": {
"oneOf": [
{
- "$ref": "#/components/schemas/URIDataReference"
+ "$ref": "#/components/schemas/URIDataSource"
},
{
- "$ref": "#/components/schemas/HuggingfaceDataReference"
+ "$ref": "#/components/schemas/HuggingfaceDataSource"
},
{
- "$ref": "#/components/schemas/RowsDataReference"
+ "$ref": "#/components/schemas/RowsDataSource"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
- "uri": "#/components/schemas/URIDataReference",
- "huggingface": "#/components/schemas/HuggingfaceDataReference",
- "rows": "#/components/schemas/RowsDataReference"
+ "uri": "#/components/schemas/URIDataSource",
+ "huggingface": "#/components/schemas/HuggingfaceDataSource",
+ "rows": "#/components/schemas/RowsDataSource"
}
}
},
@@ -7185,13 +7185,13 @@
"schema": {
"type": "string",
"enum": [
- "jsonl_messages"
+ "messages"
],
"title": "Schema",
"description": "Schema of the dataset. Each type has a different column format."
},
- "data_reference": {
- "$ref": "#/components/schemas/DataReference"
+ "data_source": {
+ "$ref": "#/components/schemas/DataSource"
},
"metadata": {
"type": "object",
@@ -7226,12 +7226,12 @@
"provider_id",
"type",
"schema",
- "data_reference",
+ "data_source",
"metadata"
],
"title": "Dataset"
},
- "HuggingfaceDataReference": {
+ "HuggingfaceDataSource": {
"type": "object",
"properties": {
"type": {
@@ -7274,9 +7274,9 @@
"dataset_path",
"params"
],
- "title": "HuggingfaceDataReference"
+ "title": "HuggingfaceDataSource"
},
- "RowsDataReference": {
+ "RowsDataSource": {
"type": "object",
"properties": {
"type": {
@@ -7318,9 +7318,9 @@
"type",
"rows"
],
- "title": "RowsDataReference"
+ "title": "RowsDataSource"
},
- "URIDataReference": {
+ "URIDataSource": {
"type": "object",
"properties": {
"type": {
@@ -7337,7 +7337,7 @@
"type",
"uri"
],
- "title": "URIDataReference"
+ "title": "URIDataSource"
},
"Model": {
"type": "object",
@@ -9506,13 +9506,13 @@
"schema": {
"type": "string",
"enum": [
- "jsonl_messages"
+ "messages"
],
- "description": "The schema format of the dataset. One of - jsonl_messages: The dataset is a JSONL file with messages in column format"
+ "description": "The schema format of the dataset. One of - messages: The dataset contains a messages column with list of messages for post-training."
},
- "data_reference": {
- "$ref": "#/components/schemas/DataReference",
- "description": "The data reference of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
+ "data_source": {
+ "$ref": "#/components/schemas/DataSource",
+ "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
},
"metadata": {
"type": "object",
@@ -9548,7 +9548,7 @@
"additionalProperties": false,
"required": [
"schema",
- "data_reference"
+ "data_source"
],
"title": "RegisterDatasetRequest"
},
@@ -9602,29 +9602,9 @@
"RegisterScoringFunctionRequest": {
"type": "object",
"properties": {
- "scoring_fn_type": {
- "type": "string",
- "enum": [
- "custom_llm_as_judge",
- "regex_parser",
- "regex_parser_math_response",
- "equality",
- "subset_of",
- "factuality",
- "faithfulness",
- "answer_correctness",
- "answer_relevancy",
- "answer_similarity",
- "context_entity_recall",
- "context_precision",
- "context_recall",
- "context_relevancy"
- ],
- "description": "The type of scoring function to register."
- },
- "params": {
+ "fn": {
"$ref": "#/components/schemas/ScoringFnParams",
- "description": "The parameters for the scoring function."
+ "description": "The type and parameters for the scoring function."
},
"scoring_fn_id": {
"type": "string",
@@ -9659,8 +9639,7 @@
},
"additionalProperties": false,
"required": [
- "scoring_fn_type",
- "params"
+ "fn"
],
"title": "RegisterScoringFunctionRequest"
},
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 2fe35cc2c..e37c49100 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -1698,7 +1698,7 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
- description: Register a new dataset through a file or
+ description: Register a new dataset.
parameters: []
requestBody:
content:
@@ -4974,17 +4974,17 @@ components:
- scoring_functions
- metadata
title: Benchmark
- DataReference:
+ DataSource:
oneOf:
- - $ref: '#/components/schemas/URIDataReference'
- - $ref: '#/components/schemas/HuggingfaceDataReference'
- - $ref: '#/components/schemas/RowsDataReference'
+ - $ref: '#/components/schemas/URIDataSource'
+ - $ref: '#/components/schemas/HuggingfaceDataSource'
+ - $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
- uri: '#/components/schemas/URIDataReference'
- huggingface: '#/components/schemas/HuggingfaceDataReference'
- rows: '#/components/schemas/RowsDataReference'
+ uri: '#/components/schemas/URIDataSource'
+ huggingface: '#/components/schemas/HuggingfaceDataSource'
+ rows: '#/components/schemas/RowsDataSource'
Dataset:
type: object
properties:
@@ -5001,12 +5001,12 @@ components:
schema:
type: string
enum:
- - jsonl_messages
+ - messages
title: Schema
description: >-
Schema of the dataset. Each type has a different column format.
- data_reference:
- $ref: '#/components/schemas/DataReference'
+ data_source:
+ $ref: '#/components/schemas/DataSource'
metadata:
type: object
additionalProperties:
@@ -5024,10 +5024,10 @@ components:
- provider_id
- type
- schema
- - data_reference
+ - data_source
- metadata
title: Dataset
- HuggingfaceDataReference:
+ HuggingfaceDataSource:
type: object
properties:
type:
@@ -5051,8 +5051,8 @@ components:
- type
- dataset_path
- params
- title: HuggingfaceDataReference
- RowsDataReference:
+ title: HuggingfaceDataSource
+ RowsDataSource:
type: object
properties:
type:
@@ -5075,8 +5075,8 @@ components:
required:
- type
- rows
- title: RowsDataReference
- URIDataReference:
+ title: RowsDataSource
+ URIDataSource:
type: object
properties:
type:
@@ -5089,7 +5089,7 @@ components:
required:
- type
- uri
- title: URIDataReference
+ title: URIDataSource
Model:
type: object
properties:
@@ -6472,19 +6472,19 @@ components:
schema:
type: string
enum:
- - jsonl_messages
+ - messages
description: >-
- The schema format of the dataset. One of - jsonl_messages: The dataset
- is a JSONL file with messages in column format
- data_reference:
- $ref: '#/components/schemas/DataReference'
+ The schema format of the dataset. One of - messages: The dataset contains
+ a messages column with list of messages for post-training.
+ data_source:
+ $ref: '#/components/schemas/DataSource'
description: >-
- The data reference of the dataset. Examples: - { "type": "uri", "uri":
- "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": "lsfs://mydata.jsonl"
- } - { "type": "huggingface", "dataset_path": "tatsu-lab/alpaca", "params":
- { "split": "train" } } - { "type": "rows", "rows": [ { "messages": [ {"role":
- "user", "content": "Hello, world!"}, {"role": "assistant", "content":
- "Hello, world!"}, ] } ] }
+ The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
+ } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "huggingface",
+ "dataset_path": "tatsu-lab/alpaca", "params": { "split": "train" } } -
+ { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
+ "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
+ } ] }
metadata:
type: object
additionalProperties:
@@ -6504,7 +6504,7 @@ components:
additionalProperties: false
required:
- schema
- - data_reference
+ - data_source
title: RegisterDatasetRequest
RegisterModelRequest:
type: object
@@ -6534,28 +6534,10 @@ components:
RegisterScoringFunctionRequest:
type: object
properties:
- scoring_fn_type:
- type: string
- enum:
- - custom_llm_as_judge
- - regex_parser
- - regex_parser_math_response
- - equality
- - subset_of
- - factuality
- - faithfulness
- - answer_correctness
- - answer_relevancy
- - answer_similarity
- - context_entity_recall
- - context_precision
- - context_recall
- - context_relevancy
- description: >-
- The type of scoring function to register.
- params:
+ fn:
$ref: '#/components/schemas/ScoringFnParams'
- description: The parameters for the scoring function.
+ description: >-
+ The type and parameters for the scoring function.
scoring_fn_id:
type: string
description: >-
@@ -6576,8 +6558,7 @@ components:
- E.g. {"description": "This scoring function is used for ..."}
additionalProperties: false
required:
- - scoring_fn_type
- - params
+ - fn
title: RegisterScoringFunctionRequest
RegisterShieldRequest:
type: object
diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index f20edca31..4b3ce3e6f 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
class Schema(Enum):
"""
Schema of the dataset. Each type has a different column format.
- :cvar jsonl_messages: The dataset is a JSONL file with messages. Examples:
+ :cvar messages: The dataset contains messages used for post-training. Examples:
{
"messages": [
{"role": "user", "content": "Hello, world!"},
@@ -25,7 +25,7 @@ class Schema(Enum):
}
"""
- jsonl_messages = "jsonl_messages"
+ messages = "messages"
# TODO: add more schemas here
@@ -36,36 +36,36 @@ class DatasetType(Enum):
@json_schema_type
-class URIDataReference(BaseModel):
+class URIDataSource(BaseModel):
type: Literal["uri"] = "uri"
uri: str
@json_schema_type
-class HuggingfaceDataReference(BaseModel):
+class HuggingfaceDataSource(BaseModel):
type: Literal["huggingface"] = "huggingface"
dataset_path: str
params: Dict[str, Any]
@json_schema_type
-class RowsDataReference(BaseModel):
+class RowsDataSource(BaseModel):
type: Literal["rows"] = "rows"
rows: List[Dict[str, Any]]
-DataReference = register_schema(
+DataSource = register_schema(
Annotated[
- Union[URIDataReference, HuggingfaceDataReference, RowsDataReference],
+ Union[URIDataSource, HuggingfaceDataSource, RowsDataSource],
Field(discriminator="type"),
],
- name="DataReference",
+ name="DataSource",
)
class CommonDatasetFields(BaseModel):
schema: Schema
- data_reference: DataReference
+ data_source: DataSource
metadata: Dict[str, Any] = Field(
default_factory=dict,
description="Any additional metadata for this dataset",
@@ -100,16 +100,16 @@ class Datasets(Protocol):
async def register_dataset(
self,
schema: Schema,
- data_reference: DataReference,
+ data_source: DataSource,
metadata: Optional[Dict[str, Any]] = None,
dataset_id: Optional[str] = None,
) -> Dataset:
"""
- Register a new dataset through a file or
+ Register a new dataset.
:param schema: The schema format of the dataset. One of
- - jsonl_messages: The dataset is a JSONL file with messages in column format
- :param data_reference: The data reference of the dataset. Examples:
+ - messages: The dataset contains a messages column with list of messages for post-training.
+ :param data_source: The data source of the dataset. Examples:
- {
"type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl"
diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py
index 37c2fae84..243dc37e9 100644
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@@ -12,8 +12,8 @@ from typing import (
Literal,
Optional,
Protocol,
- Union,
runtime_checkable,
+ Union,
)
from pydantic import BaseModel, Field
@@ -218,7 +218,9 @@ class CommonScoringFnFields(BaseModel):
@json_schema_type
class ScoringFn(CommonScoringFnFields, Resource):
- type: Literal[ResourceType.scoring_function.value] = ResourceType.scoring_function.value
+ type: Literal[ResourceType.scoring_function.value] = (
+ ResourceType.scoring_function.value
+ )
@property
def scoring_fn_id(self) -> str:
@@ -245,13 +247,14 @@ class ScoringFunctions(Protocol):
async def list_scoring_functions(self) -> ListScoringFunctionsResponse: ...
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET")
- async def get_scoring_function(self, scoring_fn_id: str, /) -> Optional[ScoringFn]: ...
+ async def get_scoring_function(
+ self, scoring_fn_id: str, /
+ ) -> Optional[ScoringFn]: ...
@webmethod(route="/scoring-functions", method="POST")
async def register_scoring_function(
self,
- scoring_fn_type: ScoringFunctionType,
- params: ScoringFnParams = None,
+ fn: ScoringFnParams,
scoring_fn_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> ScoringFn:
@@ -259,8 +262,7 @@ class ScoringFunctions(Protocol):
Register a new scoring function with given parameters.
Only valid scoring function type that can be parameterized can be registered.
- :param scoring_fn_type: The type of scoring function to register.
- :param params: The parameters for the scoring function.
+ :param fn: The type and parameters for the scoring function.
:param scoring_fn_id: (Optional) The ID of the scoring function to register. If not provided, a random ID will be generated.
:param metadata: (Optional) Any additional metadata to be associated with the scoring function.
- E.g. {"description": "This scoring function is used for ..."}