Merge branch 'api_2' into api_3

This commit is contained in:
Xi Yan 2025-03-12 00:21:03 -07:00
commit d0e372058d
4 changed files with 83 additions and 121 deletions

View file

@ -2518,7 +2518,7 @@
"tags": [ "tags": [
"Datasets" "Datasets"
], ],
"description": "Register a new dataset through a file or", "description": "Register a new dataset.",
"parameters": [], "parameters": [],
"requestBody": { "requestBody": {
"content": { "content": {
@ -7144,24 +7144,24 @@
], ],
"title": "Benchmark" "title": "Benchmark"
}, },
"DataReference": { "DataSource": {
"oneOf": [ "oneOf": [
{ {
"$ref": "#/components/schemas/URIDataReference" "$ref": "#/components/schemas/URIDataSource"
}, },
{ {
"$ref": "#/components/schemas/HuggingfaceDataReference" "$ref": "#/components/schemas/HuggingfaceDataSource"
}, },
{ {
"$ref": "#/components/schemas/RowsDataReference" "$ref": "#/components/schemas/RowsDataSource"
} }
], ],
"discriminator": { "discriminator": {
"propertyName": "type", "propertyName": "type",
"mapping": { "mapping": {
"uri": "#/components/schemas/URIDataReference", "uri": "#/components/schemas/URIDataSource",
"huggingface": "#/components/schemas/HuggingfaceDataReference", "huggingface": "#/components/schemas/HuggingfaceDataSource",
"rows": "#/components/schemas/RowsDataReference" "rows": "#/components/schemas/RowsDataSource"
} }
} }
}, },
@ -7185,13 +7185,13 @@
"schema": { "schema": {
"type": "string", "type": "string",
"enum": [ "enum": [
"jsonl_messages" "messages"
], ],
"title": "Schema", "title": "Schema",
"description": "Schema of the dataset. Each type has a different column format." "description": "Schema of the dataset. Each type has a different column format."
}, },
"data_reference": { "data_source": {
"$ref": "#/components/schemas/DataReference" "$ref": "#/components/schemas/DataSource"
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
@ -7226,12 +7226,12 @@
"provider_id", "provider_id",
"type", "type",
"schema", "schema",
"data_reference", "data_source",
"metadata" "metadata"
], ],
"title": "Dataset" "title": "Dataset"
}, },
"HuggingfaceDataReference": { "HuggingfaceDataSource": {
"type": "object", "type": "object",
"properties": { "properties": {
"type": { "type": {
@ -7274,9 +7274,9 @@
"dataset_path", "dataset_path",
"params" "params"
], ],
"title": "HuggingfaceDataReference" "title": "HuggingfaceDataSource"
}, },
"RowsDataReference": { "RowsDataSource": {
"type": "object", "type": "object",
"properties": { "properties": {
"type": { "type": {
@ -7318,9 +7318,9 @@
"type", "type",
"rows" "rows"
], ],
"title": "RowsDataReference" "title": "RowsDataSource"
}, },
"URIDataReference": { "URIDataSource": {
"type": "object", "type": "object",
"properties": { "properties": {
"type": { "type": {
@ -7337,7 +7337,7 @@
"type", "type",
"uri" "uri"
], ],
"title": "URIDataReference" "title": "URIDataSource"
}, },
"Model": { "Model": {
"type": "object", "type": "object",
@ -9506,13 +9506,13 @@
"schema": { "schema": {
"type": "string", "type": "string",
"enum": [ "enum": [
"jsonl_messages" "messages"
], ],
"description": "The schema format of the dataset. One of - jsonl_messages: The dataset is a JSONL file with messages in column format" "description": "The schema format of the dataset. One of - messages: The dataset contains a messages column with list of messages for post-training."
}, },
"data_reference": { "data_source": {
"$ref": "#/components/schemas/DataReference", "$ref": "#/components/schemas/DataSource",
"description": "The data reference of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }" "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
@ -9548,7 +9548,7 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"schema", "schema",
"data_reference" "data_source"
], ],
"title": "RegisterDatasetRequest" "title": "RegisterDatasetRequest"
}, },
@ -9602,29 +9602,9 @@
"RegisterScoringFunctionRequest": { "RegisterScoringFunctionRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"scoring_fn_type": { "fn": {
"type": "string",
"enum": [
"custom_llm_as_judge",
"regex_parser",
"regex_parser_math_response",
"equality",
"subset_of",
"factuality",
"faithfulness",
"answer_correctness",
"answer_relevancy",
"answer_similarity",
"context_entity_recall",
"context_precision",
"context_recall",
"context_relevancy"
],
"description": "The type of scoring function to register."
},
"params": {
"$ref": "#/components/schemas/ScoringFnParams", "$ref": "#/components/schemas/ScoringFnParams",
"description": "The parameters for the scoring function." "description": "The type and parameters for the scoring function."
}, },
"scoring_fn_id": { "scoring_fn_id": {
"type": "string", "type": "string",
@ -9659,8 +9639,7 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"scoring_fn_type", "fn"
"params"
], ],
"title": "RegisterScoringFunctionRequest" "title": "RegisterScoringFunctionRequest"
}, },

View file

@ -1698,7 +1698,7 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- Datasets - Datasets
description: Register a new dataset through a file or description: Register a new dataset.
parameters: [] parameters: []
requestBody: requestBody:
content: content:
@ -4974,17 +4974,17 @@ components:
- scoring_functions - scoring_functions
- metadata - metadata
title: Benchmark title: Benchmark
DataReference: DataSource:
oneOf: oneOf:
- $ref: '#/components/schemas/URIDataReference' - $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/HuggingfaceDataReference' - $ref: '#/components/schemas/HuggingfaceDataSource'
- $ref: '#/components/schemas/RowsDataReference' - $ref: '#/components/schemas/RowsDataSource'
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
uri: '#/components/schemas/URIDataReference' uri: '#/components/schemas/URIDataSource'
huggingface: '#/components/schemas/HuggingfaceDataReference' huggingface: '#/components/schemas/HuggingfaceDataSource'
rows: '#/components/schemas/RowsDataReference' rows: '#/components/schemas/RowsDataSource'
Dataset: Dataset:
type: object type: object
properties: properties:
@ -5001,12 +5001,12 @@ components:
schema: schema:
type: string type: string
enum: enum:
- jsonl_messages - messages
title: Schema title: Schema
description: >- description: >-
Schema of the dataset. Each type has a different column format. Schema of the dataset. Each type has a different column format.
data_reference: data_source:
$ref: '#/components/schemas/DataReference' $ref: '#/components/schemas/DataSource'
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -5024,10 +5024,10 @@ components:
- provider_id - provider_id
- type - type
- schema - schema
- data_reference - data_source
- metadata - metadata
title: Dataset title: Dataset
HuggingfaceDataReference: HuggingfaceDataSource:
type: object type: object
properties: properties:
type: type:
@ -5051,8 +5051,8 @@ components:
- type - type
- dataset_path - dataset_path
- params - params
title: HuggingfaceDataReference title: HuggingfaceDataSource
RowsDataReference: RowsDataSource:
type: object type: object
properties: properties:
type: type:
@ -5075,8 +5075,8 @@ components:
required: required:
- type - type
- rows - rows
title: RowsDataReference title: RowsDataSource
URIDataReference: URIDataSource:
type: object type: object
properties: properties:
type: type:
@ -5089,7 +5089,7 @@ components:
required: required:
- type - type
- uri - uri
title: URIDataReference title: URIDataSource
Model: Model:
type: object type: object
properties: properties:
@ -6472,19 +6472,19 @@ components:
schema: schema:
type: string type: string
enum: enum:
- jsonl_messages - messages
description: >- description: >-
The schema format of the dataset. One of - jsonl_messages: The dataset The schema format of the dataset. One of - messages: The dataset contains
is a JSONL file with messages in column format a messages column with list of messages for post-training.
data_reference: data_source:
$ref: '#/components/schemas/DataReference' $ref: '#/components/schemas/DataSource'
description: >- description: >-
The data reference of the dataset. Examples: - { "type": "uri", "uri": The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
"https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "huggingface",
} - { "type": "huggingface", "dataset_path": "tatsu-lab/alpaca", "params": "dataset_path": "tatsu-lab/alpaca", "params": { "split": "train" } } -
{ "split": "train" } } - { "type": "rows", "rows": [ { "messages": [ {"role": { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
"user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
"Hello, world!"}, ] } ] } } ] }
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -6504,7 +6504,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- schema - schema
- data_reference - data_source
title: RegisterDatasetRequest title: RegisterDatasetRequest
RegisterModelRequest: RegisterModelRequest:
type: object type: object
@ -6534,28 +6534,10 @@ components:
RegisterScoringFunctionRequest: RegisterScoringFunctionRequest:
type: object type: object
properties: properties:
scoring_fn_type: fn:
type: string
enum:
- custom_llm_as_judge
- regex_parser
- regex_parser_math_response
- equality
- subset_of
- factuality
- faithfulness
- answer_correctness
- answer_relevancy
- answer_similarity
- context_entity_recall
- context_precision
- context_recall
- context_relevancy
description: >-
The type of scoring function to register.
params:
$ref: '#/components/schemas/ScoringFnParams' $ref: '#/components/schemas/ScoringFnParams'
description: The parameters for the scoring function. description: >-
The type and parameters for the scoring function.
scoring_fn_id: scoring_fn_id:
type: string type: string
description: >- description: >-
@ -6576,8 +6558,7 @@ components:
- E.g. {"description": "This scoring function is used for ..."} - E.g. {"description": "This scoring function is used for ..."}
additionalProperties: false additionalProperties: false
required: required:
- scoring_fn_type - fn
- params
title: RegisterScoringFunctionRequest title: RegisterScoringFunctionRequest
RegisterShieldRequest: RegisterShieldRequest:
type: object type: object

View file

@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
class Schema(Enum): class Schema(Enum):
""" """
Schema of the dataset. Each type has a different column format. Schema of the dataset. Each type has a different column format.
:cvar jsonl_messages: The dataset is a JSONL file with messages. Examples: :cvar messages: The dataset contains messages used for post-training. Examples:
{ {
"messages": [ "messages": [
{"role": "user", "content": "Hello, world!"}, {"role": "user", "content": "Hello, world!"},
@ -25,7 +25,7 @@ class Schema(Enum):
} }
""" """
jsonl_messages = "jsonl_messages" messages = "messages"
# TODO: add more schemas here # TODO: add more schemas here
@ -36,36 +36,36 @@ class DatasetType(Enum):
@json_schema_type @json_schema_type
class URIDataReference(BaseModel): class URIDataSource(BaseModel):
type: Literal["uri"] = "uri" type: Literal["uri"] = "uri"
uri: str uri: str
@json_schema_type @json_schema_type
class HuggingfaceDataReference(BaseModel): class HuggingfaceDataSource(BaseModel):
type: Literal["huggingface"] = "huggingface" type: Literal["huggingface"] = "huggingface"
dataset_path: str dataset_path: str
params: Dict[str, Any] params: Dict[str, Any]
@json_schema_type @json_schema_type
class RowsDataReference(BaseModel): class RowsDataSource(BaseModel):
type: Literal["rows"] = "rows" type: Literal["rows"] = "rows"
rows: List[Dict[str, Any]] rows: List[Dict[str, Any]]
DataReference = register_schema( DataSource = register_schema(
Annotated[ Annotated[
Union[URIDataReference, HuggingfaceDataReference, RowsDataReference], Union[URIDataSource, HuggingfaceDataSource, RowsDataSource],
Field(discriminator="type"), Field(discriminator="type"),
], ],
name="DataReference", name="DataSource",
) )
class CommonDatasetFields(BaseModel): class CommonDatasetFields(BaseModel):
schema: Schema schema: Schema
data_reference: DataReference data_source: DataSource
metadata: Dict[str, Any] = Field( metadata: Dict[str, Any] = Field(
default_factory=dict, default_factory=dict,
description="Any additional metadata for this dataset", description="Any additional metadata for this dataset",
@ -100,16 +100,16 @@ class Datasets(Protocol):
async def register_dataset( async def register_dataset(
self, self,
schema: Schema, schema: Schema,
data_reference: DataReference, data_source: DataSource,
metadata: Optional[Dict[str, Any]] = None, metadata: Optional[Dict[str, Any]] = None,
dataset_id: Optional[str] = None, dataset_id: Optional[str] = None,
) -> Dataset: ) -> Dataset:
""" """
Register a new dataset through a file or Register a new dataset.
:param schema: The schema format of the dataset. One of :param schema: The schema format of the dataset. One of
- jsonl_messages: The dataset is a JSONL file with messages in column format - messages: The dataset contains a messages column with list of messages for post-training.
:param data_reference: The data reference of the dataset. Examples: :param data_source: The data source of the dataset. Examples:
- { - {
"type": "uri", "type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl" "uri": "https://mywebsite.com/mydata.jsonl"

View file

@ -12,8 +12,8 @@ from typing import (
Literal, Literal,
Optional, Optional,
Protocol, Protocol,
Union,
runtime_checkable, runtime_checkable,
Union,
) )
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@ -218,7 +218,9 @@ class CommonScoringFnFields(BaseModel):
@json_schema_type @json_schema_type
class ScoringFn(CommonScoringFnFields, Resource): class ScoringFn(CommonScoringFnFields, Resource):
type: Literal[ResourceType.scoring_function.value] = ResourceType.scoring_function.value type: Literal[ResourceType.scoring_function.value] = (
ResourceType.scoring_function.value
)
@property @property
def scoring_fn_id(self) -> str: def scoring_fn_id(self) -> str:
@ -245,13 +247,14 @@ class ScoringFunctions(Protocol):
async def list_scoring_functions(self) -> ListScoringFunctionsResponse: ... async def list_scoring_functions(self) -> ListScoringFunctionsResponse: ...
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET") @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET")
async def get_scoring_function(self, scoring_fn_id: str, /) -> Optional[ScoringFn]: ... async def get_scoring_function(
self, scoring_fn_id: str, /
) -> Optional[ScoringFn]: ...
@webmethod(route="/scoring-functions", method="POST") @webmethod(route="/scoring-functions", method="POST")
async def register_scoring_function( async def register_scoring_function(
self, self,
scoring_fn_type: ScoringFunctionType, fn: ScoringFnParams,
params: ScoringFnParams = None,
scoring_fn_id: Optional[str] = None, scoring_fn_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None, metadata: Optional[Dict[str, Any]] = None,
) -> ScoringFn: ) -> ScoringFn:
@ -259,8 +262,7 @@ class ScoringFunctions(Protocol):
Register a new scoring function with given parameters. Register a new scoring function with given parameters.
Only valid scoring function type that can be parameterized can be registered. Only valid scoring function type that can be parameterized can be registered.
:param scoring_fn_type: The type of scoring function to register. :param fn: The type and parameters for the scoring function.
:param params: The parameters for the scoring function.
:param scoring_fn_id: (Optional) The ID of the scoring function to register. If not provided, a random ID will be generated. :param scoring_fn_id: (Optional) The ID of the scoring function to register. If not provided, a random ID will be generated.
:param metadata: (Optional) Any additional metadata to be associated with the scoring function. :param metadata: (Optional) Any additional metadata to be associated with the scoring function.
- E.g. {"description": "This scoring function is used for ..."} - E.g. {"description": "This scoring function is used for ..."}