Merge branch 'api_2' into api_3

2026-01-06 21:29:56 +00:00 · 2025-03-12 00:21:03 -07:00 · 2025-03-12 00:21:03 -07:00 · d0e372058d
commit d0e372058d
parent 78b4cdad67 124040af77
4 changed files with 83 additions and 121 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -2518,7 +2518,7 @@
                "tags": [
                    "Datasets"
                ],
-                "description": "Register a new dataset through a file or",
+                "description": "Register a new dataset.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -7144,24 +7144,24 @@
                ],
                "title": "Benchmark"
            },
-            "DataReference": {
+            "DataSource": {
                "oneOf": [
                    {
-                        "$ref": "#/components/schemas/URIDataReference"
+                        "$ref": "#/components/schemas/URIDataSource"
                    },
                    {
-                        "$ref": "#/components/schemas/HuggingfaceDataReference"
+                        "$ref": "#/components/schemas/HuggingfaceDataSource"
                    },
                    {
-                        "$ref": "#/components/schemas/RowsDataReference"
+                        "$ref": "#/components/schemas/RowsDataSource"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
-                        "uri": "#/components/schemas/URIDataReference",
+                        "uri": "#/components/schemas/URIDataSource",
-                        "huggingface": "#/components/schemas/HuggingfaceDataReference",
+                        "huggingface": "#/components/schemas/HuggingfaceDataSource",
-                        "rows": "#/components/schemas/RowsDataReference"
+                        "rows": "#/components/schemas/RowsDataSource"
                    }
                }
            },
@ -7185,13 +7185,13 @@
                    "schema": {
                        "type": "string",
                        "enum": [
-                            "jsonl_messages"
+                            "messages"
                        ],
                        "title": "Schema",
                        "description": "Schema of the dataset. Each type has a different column format."
                    },
-                    "data_reference": {
+                    "data_source": {
-                        "$ref": "#/components/schemas/DataReference"
+                        "$ref": "#/components/schemas/DataSource"
                    },
                    "metadata": {
                        "type": "object",
@ -7226,12 +7226,12 @@
                    "provider_id",
                    "type",
                    "schema",
-                    "data_reference",
+                    "data_source",
                    "metadata"
                ],
                "title": "Dataset"
            },
-            "HuggingfaceDataReference": {
+            "HuggingfaceDataSource": {
                "type": "object",
                "properties": {
                    "type": {
@ -7274,9 +7274,9 @@
                    "dataset_path",
                    "params"
                ],
-                "title": "HuggingfaceDataReference"
+                "title": "HuggingfaceDataSource"
            },
-            "RowsDataReference": {
+            "RowsDataSource": {
                "type": "object",
                "properties": {
                    "type": {
@ -7318,9 +7318,9 @@
                    "type",
                    "rows"
                ],
-                "title": "RowsDataReference"
+                "title": "RowsDataSource"
            },
-            "URIDataReference": {
+            "URIDataSource": {
                "type": "object",
                "properties": {
                    "type": {
@ -7337,7 +7337,7 @@
                    "type",
                    "uri"
                ],
-                "title": "URIDataReference"
+                "title": "URIDataSource"
            },
            "Model": {
                "type": "object",
@ -9506,13 +9506,13 @@
                    "schema": {
                        "type": "string",
                        "enum": [
-                            "jsonl_messages"
+                            "messages"
                        ],
-                        "description": "The schema format of the dataset. One of - jsonl_messages: The dataset is a JSONL file with messages in column format"
+                        "description": "The schema format of the dataset. One of - messages: The dataset contains a messages column with list of messages for post-training."
                    },
-                    "data_reference": {
+                    "data_source": {
-                        "$ref": "#/components/schemas/DataReference",
+                        "$ref": "#/components/schemas/DataSource",
-                        "description": "The data reference of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
+                        "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
                    },
                    "metadata": {
                        "type": "object",
@ -9548,7 +9548,7 @@
                "additionalProperties": false,
                "required": [
                    "schema",
-                    "data_reference"
+                    "data_source"
                ],
                "title": "RegisterDatasetRequest"
            },
@ -9602,29 +9602,9 @@
            "RegisterScoringFunctionRequest": {
                "type": "object",
                "properties": {
-                    "scoring_fn_type": {
+                    "fn": {
                        "type": "string",
                        "enum": [
                            "custom_llm_as_judge",
                            "regex_parser",
                            "regex_parser_math_response",
                            "equality",
                            "subset_of",
                            "factuality",
                            "faithfulness",
                            "answer_correctness",
                            "answer_relevancy",
                            "answer_similarity",
                            "context_entity_recall",
                            "context_precision",
                            "context_recall",
                            "context_relevancy"
                        ],
                        "description": "The type of scoring function to register."
                    },
                    "params": {
                        "$ref": "#/components/schemas/ScoringFnParams",
-                        "description": "The parameters for the scoring function."
+                        "description": "The type and parameters for the scoring function."
                    },
                    "scoring_fn_id": {
                        "type": "string",
@ -9659,8 +9639,7 @@
                },
                "additionalProperties": false,
                "required": [
-                    "scoring_fn_type",
+                    "fn"
                    "params"
                ],
                "title": "RegisterScoringFunctionRequest"
            },
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -1698,7 +1698,7 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Datasets
-      description: Register a new dataset through a file or
+      description: Register a new dataset.
      parameters: []
      requestBody:
        content:
@ -4974,17 +4974,17 @@ components:
        - scoring_functions
        - metadata
      title: Benchmark
-    DataReference:
+    DataSource:
      oneOf:
-        - $ref: '#/components/schemas/URIDataReference'
+        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/HuggingfaceDataReference'
+        - $ref: '#/components/schemas/HuggingfaceDataSource'
-        - $ref: '#/components/schemas/RowsDataReference'
+        - $ref: '#/components/schemas/RowsDataSource'
      discriminator:
        propertyName: type
        mapping:
-          uri: '#/components/schemas/URIDataReference'
+          uri: '#/components/schemas/URIDataSource'
-          huggingface: '#/components/schemas/HuggingfaceDataReference'
+          huggingface: '#/components/schemas/HuggingfaceDataSource'
-          rows: '#/components/schemas/RowsDataReference'
+          rows: '#/components/schemas/RowsDataSource'
    Dataset:
      type: object
      properties:
@ -5001,12 +5001,12 @@ components:
        schema:
          type: string
          enum:
-            - jsonl_messages
+            - messages
          title: Schema
          description: >-
            Schema of the dataset. Each type has a different column format.
-        data_reference:
+        data_source:
-          $ref: '#/components/schemas/DataReference'
+          $ref: '#/components/schemas/DataSource'
        metadata:
          type: object
          additionalProperties:
@ -5024,10 +5024,10 @@ components:
        - provider_id
        - type
        - schema
-        - data_reference
+        - data_source
        - metadata
      title: Dataset
-    HuggingfaceDataReference:
+    HuggingfaceDataSource:
      type: object
      properties:
        type:
@ -5051,8 +5051,8 @@ components:
        - type
        - dataset_path
        - params
-      title: HuggingfaceDataReference
+      title: HuggingfaceDataSource
-    RowsDataReference:
+    RowsDataSource:
      type: object
      properties:
        type:
@ -5075,8 +5075,8 @@ components:
      required:
        - type
        - rows
-      title: RowsDataReference
+      title: RowsDataSource
-    URIDataReference:
+    URIDataSource:
      type: object
      properties:
        type:
@ -5089,7 +5089,7 @@ components:
      required:
        - type
        - uri
-      title: URIDataReference
+      title: URIDataSource
    Model:
      type: object
      properties:
@ -6472,19 +6472,19 @@ components:
        schema:
          type: string
          enum:
-            - jsonl_messages
+            - messages
          description: >-
-            The schema format of the dataset. One of - jsonl_messages: The dataset
+            The schema format of the dataset. One of - messages: The dataset contains
-            is a JSONL file with messages in column format
+            a messages column with list of messages for post-training.
-        data_reference:
+        data_source:
-          $ref: '#/components/schemas/DataReference'
+          $ref: '#/components/schemas/DataSource'
          description: >-
-            The data reference of the dataset. Examples: - { "type": "uri", "uri":
+            The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
-            "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": "lsfs://mydata.jsonl"
+            } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "huggingface",
-            } - { "type": "huggingface", "dataset_path": "tatsu-lab/alpaca", "params":
+            "dataset_path": "tatsu-lab/alpaca", "params": { "split": "train" } } -
-            { "split": "train" } } - { "type": "rows", "rows": [ { "messages": [ {"role":
+            { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "user", "content": "Hello, world!"}, {"role": "assistant", "content":
+            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            "Hello, world!"}, ] } ] }
+            } ] }
        metadata:
          type: object
          additionalProperties:
@ -6504,7 +6504,7 @@ components:
      additionalProperties: false
      required:
        - schema
-        - data_reference
+        - data_source
      title: RegisterDatasetRequest
    RegisterModelRequest:
      type: object
@ -6534,28 +6534,10 @@ components:
    RegisterScoringFunctionRequest:
      type: object
      properties:
-        scoring_fn_type:
+        fn:
          type: string
          enum:
            - custom_llm_as_judge
            - regex_parser
            - regex_parser_math_response
            - equality
            - subset_of
            - factuality
            - faithfulness
            - answer_correctness
            - answer_relevancy
            - answer_similarity
            - context_entity_recall
            - context_precision
            - context_recall
            - context_relevancy
          description: >-
            The type of scoring function to register.
        params:
          $ref: '#/components/schemas/ScoringFnParams'
-          description: The parameters for the scoring function.
+          description: >-
            The type and parameters for the scoring function.
        scoring_fn_id:
          type: string
          description: >-
@ -6576,8 +6558,7 @@ components:
            - E.g. {"description": "This scoring function is used for ..."}
      additionalProperties: false
      required:
-        - scoring_fn_type
+        - fn
        - params
      title: RegisterScoringFunctionRequest
    RegisterShieldRequest:
      type: object
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
 class Schema(Enum):
    """
    Schema of the dataset. Each type has a different column format.
-    :cvar jsonl_messages: The dataset is a JSONL file with messages. Examples:
+    :cvar messages: The dataset contains messages used for post-training. Examples:
        {
            "messages": [
                {"role": "user", "content": "Hello, world!"},
@ -25,7 +25,7 @@ class Schema(Enum):
        }
    """
-    jsonl_messages = "jsonl_messages"
+    messages = "messages"
    # TODO: add more schemas here
@ -36,36 +36,36 @@ class DatasetType(Enum):
@json_schema_type
-class URIDataReference(BaseModel):
+class URIDataSource(BaseModel):
    type: Literal["uri"] = "uri"
    uri: str
@json_schema_type
-class HuggingfaceDataReference(BaseModel):
+class HuggingfaceDataSource(BaseModel):
    type: Literal["huggingface"] = "huggingface"
    dataset_path: str
    params: Dict[str, Any]
@json_schema_type
-class RowsDataReference(BaseModel):
+class RowsDataSource(BaseModel):
    type: Literal["rows"] = "rows"
    rows: List[Dict[str, Any]]
-DataReference = register_schema(
+DataSource = register_schema(
    Annotated[
-        Union[URIDataReference, HuggingfaceDataReference, RowsDataReference],
+        Union[URIDataSource, HuggingfaceDataSource, RowsDataSource],
        Field(discriminator="type"),
    ],
-    name="DataReference",
+    name="DataSource",
 )
 class CommonDatasetFields(BaseModel):
    schema: Schema
-    data_reference: DataReference
+    data_source: DataSource
    metadata: Dict[str, Any] = Field(
        default_factory=dict,
        description="Any additional metadata for this dataset",
@ -100,16 +100,16 @@ class Datasets(Protocol):
    async def register_dataset(
        self,
        schema: Schema,
-        data_reference: DataReference,
+        data_source: DataSource,
        metadata: Optional[Dict[str, Any]] = None,
        dataset_id: Optional[str] = None,
    ) -> Dataset:
        """
-        Register a new dataset through a file or
+        Register a new dataset.
        :param schema: The schema format of the dataset. One of
-            - jsonl_messages: The dataset is a JSONL file with messages in column format
+            - messages: The dataset contains a messages column with list of messages for post-training.
-        :param data_reference: The data reference of the dataset. Examples:
+        :param data_source: The data source of the dataset. Examples:
           - {
               "type": "uri",
               "uri": "https://mywebsite.com/mydata.jsonl"
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@ -12,8 +12,8 @@ from typing import (
    Literal,
    Optional,
    Protocol,
    Union,
    runtime_checkable,
    Union,
 )
 from pydantic import BaseModel, Field
@ -218,7 +218,9 @@ class CommonScoringFnFields(BaseModel):
@json_schema_type
 class ScoringFn(CommonScoringFnFields, Resource):
-    type: Literal[ResourceType.scoring_function.value] = ResourceType.scoring_function.value
+    type: Literal[ResourceType.scoring_function.value] = (
        ResourceType.scoring_function.value
    )
    @property
    def scoring_fn_id(self) -> str:
@ -245,13 +247,14 @@ class ScoringFunctions(Protocol):
    async def list_scoring_functions(self) -> ListScoringFunctionsResponse: ...
    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET")
-    async def get_scoring_function(self, scoring_fn_id: str, /) -> Optional[ScoringFn]: ...
+    async def get_scoring_function(
        self, scoring_fn_id: str, /
    ) -> Optional[ScoringFn]: ...
    @webmethod(route="/scoring-functions", method="POST")
    async def register_scoring_function(
        self,
-        scoring_fn_type: ScoringFunctionType,
+        fn: ScoringFnParams,
        params: ScoringFnParams = None,
        scoring_fn_id: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> ScoringFn:
@ -259,8 +262,7 @@ class ScoringFunctions(Protocol):
        Register a new scoring function with given parameters.
        Only valid scoring function type that can be parameterized can be registered.
-        :param scoring_fn_type: The type of scoring function to register.
+        :param fn: The type and parameters for the scoring function.
        :param params: The parameters for the scoring function.
        :param scoring_fn_id: (Optional) The ID of the scoring function to register. If not provided, a random ID will be generated.
        :param metadata: (Optional) Any additional metadata to be associated with the scoring function.
            - E.g. {"description": "This scoring function is used for ..."}