Merge branch 'pr1573' into api_2

2025-12-31 04:03:53 +00:00 · 2025-03-13 14:49:04 -07:00 · 2025-03-13 14:49:04 -07:00 · 0c37951395
commit 0c37951395
parent 025d173606 a6095820af
4 changed files with 246 additions and 354 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -40,75 +40,7 @@
        }
    ],
    "paths": {
-        "/v1/datasetio/rows": {
+        "/v1/datasets/{dataset_id}/rows": {
            "get": {
                "responses": {
                    "200": {
                        "description": "OK",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/PaginatedRowsResult"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "DatasetIO"
                ],
                "description": "Get a paginated list of rows from a dataset.",
                "parameters": [
                    {
                        "name": "dataset_id",
                        "in": "query",
                        "description": "The ID of the dataset to get the rows from.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "rows_in_page",
                        "in": "query",
                        "description": "The number of rows to get per page.",
                        "required": true,
                        "schema": {
                            "type": "integer"
                        }
                    },
                    {
                        "name": "page_token",
                        "in": "query",
                        "description": "The token to get the next page of rows.",
                        "required": false,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "filter_condition",
                        "in": "query",
                        "description": "(Optional) A condition to filter the rows by.",
                        "required": false,
                        "schema": {
                            "type": "string"
                        }
                    }
                ]
            },
            "post": {
                "responses": {
                    "200": {
@ -131,7 +63,16 @@
                    "DatasetIO"
                ],
                "description": "",
-                "parameters": [],
+                "parameters": [
                    {
                        "name": "dataset_id",
                        "in": "path",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "requestBody": {
                    "content": {
                        "application/json": {
@ -2272,6 +2213,76 @@
                }
            }
        },
        "/v1/datasets/{dataset_id}/iterrows": {
            "get": {
                "responses": {
                    "200": {
                        "description": "OK",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/PaginatedRowsResult"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "DatasetIO"
                ],
                "description": "Get a paginated list of rows from a dataset.",
                "parameters": [
                    {
                        "name": "dataset_id",
                        "in": "path",
                        "description": "The ID of the dataset to get the rows from.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "rows_in_page",
                        "in": "query",
                        "description": "The number of rows to get per page.",
                        "required": true,
                        "schema": {
                            "type": "integer"
                        }
                    },
                    {
                        "name": "page_token",
                        "in": "query",
                        "description": "The token to get the next page of rows.",
                        "required": false,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "filter_condition",
                        "in": "query",
                        "description": "(Optional) A condition to filter the rows by.",
                        "required": false,
                        "schema": {
                            "type": "string"
                        }
                    }
                ]
            }
        },
        "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
            "get": {
                "responses": {
@ -3861,9 +3872,6 @@
            "AppendRowsRequest": {
                "type": "object",
                "properties": {
                    "dataset_id": {
                        "type": "string"
                    },
                    "rows": {
                        "type": "array",
                        "items": {
@ -3895,7 +3903,6 @@
                },
                "additionalProperties": false,
                "required": [
                    "dataset_id",
                    "rows"
                ],
                "title": "AppendRowsRequest"
@ -6755,9 +6762,6 @@
                    {
                        "$ref": "#/components/schemas/URIDataSource"
                    },
                    {
                        "$ref": "#/components/schemas/HuggingfaceDataSource"
                    },
                    {
                        "$ref": "#/components/schemas/RowsDataSource"
                    }
@ -6766,7 +6770,6 @@
                    "propertyName": "type",
                    "mapping": {
                        "uri": "#/components/schemas/URIDataSource",
                        "huggingface": "#/components/schemas/HuggingfaceDataSource",
                        "rows": "#/components/schemas/RowsDataSource"
                    }
                }
@ -6842,65 +6845,6 @@
                ],
                "title": "Dataset"
            },
            "HuggingfaceDataSource": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "huggingface",
                        "default": "huggingface",
                        "description": "The type of the data source."
                    },
                    "huggingface": {
                        "type": "object",
                        "properties": {
                            "path": {
                                "type": "string",
                                "description": "The path to the dataset in Huggingface. E.g. - \"llamastack/simpleqa\""
                            },
                            "params": {
                                "type": "object",
                                "additionalProperties": {
                                    "oneOf": [
                                        {
                                            "type": "null"
                                        },
                                        {
                                            "type": "boolean"
                                        },
                                        {
                                            "type": "number"
                                        },
                                        {
                                            "type": "string"
                                        },
                                        {
                                            "type": "array"
                                        },
                                        {
                                            "type": "object"
                                        }
                                    ]
                                },
                                "description": "The parameters for the dataset."
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "path",
                            "params"
                        ],
                        "description": "The fields for a Huggingface dataset."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "huggingface"
                ],
                "title": "HuggingfaceDataSource",
                "description": "A dataset stored in Huggingface."
            },
            "RowsDataSource": {
                "type": "object",
                "properties": {
@ -7034,56 +6978,7 @@
                ],
                "title": "ModelType"
            },
-            "PaginatedRowsResult": {
+            "AgentTurnInputType": {
                "type": "object",
                "properties": {
                    "rows": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "additionalProperties": {
                                "oneOf": [
                                    {
                                        "type": "null"
                                    },
                                    {
                                        "type": "boolean"
                                    },
                                    {
                                        "type": "number"
                                    },
                                    {
                                        "type": "string"
                                    },
                                    {
                                        "type": "array"
                                    },
                                    {
                                        "type": "object"
                                    }
                                ]
                            }
                        },
                        "description": "The rows in the current page."
                    },
                    "total_count": {
                        "type": "integer",
                        "description": "The total number of rows in the dataset."
                    },
                    "next_page_token": {
                        "type": "string",
                        "description": "The token to get the next page of rows."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "rows",
                    "total_count"
                ],
                "title": "PaginatedRowsResult",
                "description": "A paginated list of rows from a dataset."
            },
            "AnswerCorrectnessScoringFn": {
                "type": "object",
                "properties": {
                    "type": {
@ -8537,6 +8432,55 @@
                ],
                "title": "ToolInvocationResult"
            },
            "PaginatedRowsResult": {
                "type": "object",
                "properties": {
                    "rows": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "additionalProperties": {
                                "oneOf": [
                                    {
                                        "type": "null"
                                    },
                                    {
                                        "type": "boolean"
                                    },
                                    {
                                        "type": "number"
                                    },
                                    {
                                        "type": "string"
                                    },
                                    {
                                        "type": "array"
                                    },
                                    {
                                        "type": "object"
                                    }
                                ]
                            }
                        },
                        "description": "The rows in the current page."
                    },
                    "total_count": {
                        "type": "integer",
                        "description": "The total number of rows in the dataset."
                    },
                    "next_page_token": {
                        "type": "string",
                        "description": "The token to get the next page of rows."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "rows",
                    "total_count"
                ],
                "title": "PaginatedRowsResult",
                "description": "A paginated list of rows from a dataset."
            },
            "ListAgentSessionsResponse": {
                "type": "object",
                "properties": {
@ -9884,7 +9828,7 @@
                    },
                    "source": {
                        "$ref": "#/components/schemas/DataSource",
-                        "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"huggingface\": { \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
+                        "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
                    },
                    "metadata": {
                        "type": "object",
@ -9914,7 +9858,7 @@
                    },
                    "dataset_id": {
                        "type": "string",
-                        "description": "The ID of the dataset. If not provided, a random ID will be generated."
+                        "description": "The ID of the dataset. If not provided, an ID will be generated."
                    }
                },
                "additionalProperties": false,
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -10,56 +10,7 @@ info:
 servers:
  - url: http://any-hosted-llama-stack.com
 paths:
-  /v1/datasetio/rows:
+  /v1/datasets/{dataset_id}/rows:
    get:
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PaginatedRowsResult'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - DatasetIO
      description: >-
        Get a paginated list of rows from a dataset.
      parameters:
        - name: dataset_id
          in: query
          description: >-
            The ID of the dataset to get the rows from.
          required: true
          schema:
            type: string
        - name: rows_in_page
          in: query
          description: The number of rows to get per page.
          required: true
          schema:
            type: integer
        - name: page_token
          in: query
          description: The token to get the next page of rows.
          required: false
          schema:
            type: string
        - name: filter_condition
          in: query
          description: >-
            (Optional) A condition to filter the rows by.
          required: false
          schema:
            type: string
    post:
      responses:
        '200':
@ -77,7 +28,12 @@ paths:
      tags:
        - DatasetIO
      description: ''
-      parameters: []
+      parameters:
        - name: dataset_id
          in: path
          required: true
          schema:
            type: string
      requestBody:
        content:
          application/json:
@ -1529,6 +1485,56 @@ paths:
            schema:
              $ref: '#/components/schemas/InvokeToolRequest'
        required: true
  /v1/datasets/{dataset_id}/iterrows:
    get:
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PaginatedRowsResult'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - DatasetIO
      description: >-
        Get a paginated list of rows from a dataset.
      parameters:
        - name: dataset_id
          in: path
          description: >-
            The ID of the dataset to get the rows from.
          required: true
          schema:
            type: string
        - name: rows_in_page
          in: query
          description: The number of rows to get per page.
          required: true
          schema:
            type: integer
        - name: page_token
          in: query
          description: The token to get the next page of rows.
          required: false
          schema:
            type: string
        - name: filter_condition
          in: query
          description: >-
            (Optional) A condition to filter the rows by.
          required: false
          schema:
            type: string
  /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
    get:
      responses:
@ -2636,8 +2642,6 @@ components:
    AppendRowsRequest:
      type: object
      properties:
        dataset_id:
          type: string
        rows:
          type: array
          items:
@ -2652,7 +2656,6 @@ components:
                - type: object
      additionalProperties: false
      required:
        - dataset_id
        - rows
      title: AppendRowsRequest
    CompletionMessage:
@ -4679,13 +4682,11 @@ components:
    DataSource:
      oneOf:
        - $ref: '#/components/schemas/URIDataSource'
        - $ref: '#/components/schemas/HuggingfaceDataSource'
        - $ref: '#/components/schemas/RowsDataSource'
      discriminator:
        propertyName: type
        mapping:
          uri: '#/components/schemas/URIDataSource'
          huggingface: '#/components/schemas/HuggingfaceDataSource'
          rows: '#/components/schemas/RowsDataSource'
    Dataset:
      type: object
@ -4734,43 +4735,6 @@ components:
        - source
        - metadata
      title: Dataset
    HuggingfaceDataSource:
      type: object
      properties:
        type:
          type: string
          const: huggingface
          default: huggingface
          description: The type of the data source.
        huggingface:
          type: object
          properties:
            path:
              type: string
              description: >-
                The path to the dataset in Huggingface. E.g. - "llamastack/simpleqa"
            params:
              type: object
              additionalProperties:
                oneOf:
                  - type: 'null'
                  - type: boolean
                  - type: number
                  - type: string
                  - type: array
                  - type: object
              description: The parameters for the dataset.
          additionalProperties: false
          required:
            - path
            - params
          description: The fields for a Huggingface dataset.
      additionalProperties: false
      required:
        - type
        - huggingface
      title: HuggingfaceDataSource
      description: A dataset stored in Huggingface.
    RowsDataSource:
      type: object
      properties:
@ -4860,35 +4824,7 @@ components:
        - llm
        - embedding
      title: ModelType
-    PaginatedRowsResult:
+    AgentTurnInputType:
      type: object
      properties:
        rows:
          type: array
          items:
            type: object
            additionalProperties:
              oneOf:
                - type: 'null'
                - type: boolean
                - type: number
                - type: string
                - type: array
                - type: object
          description: The rows in the current page.
        total_count:
          type: integer
          description: The total number of rows in the dataset.
        next_page_token:
          type: string
          description: The token to get the next page of rows.
      additionalProperties: false
      required:
        - rows
        - total_count
      title: PaginatedRowsResult
      description: A paginated list of rows from a dataset.
    AnswerCorrectnessScoringFn:
      type: object
      properties:
        type:
@ -5883,6 +5819,34 @@ components:
      required:
        - content
      title: ToolInvocationResult
    PaginatedRowsResult:
      type: object
      properties:
        rows:
          type: array
          items:
            type: object
            additionalProperties:
              oneOf:
                - type: 'null'
                - type: boolean
                - type: number
                - type: string
                - type: array
                - type: object
          description: The rows in the current page.
        total_count:
          type: integer
          description: The total number of rows in the dataset.
        next_page_token:
          type: string
          description: The token to get the next page of rows.
      additionalProperties: false
      required:
        - rows
        - total_count
      title: PaginatedRowsResult
      description: A paginated list of rows from a dataset.
    ListAgentSessionsResponse:
      type: object
      properties:
@ -6804,11 +6768,11 @@ components:
          $ref: '#/components/schemas/DataSource'
          description: >-
            The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
-            } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "huggingface",
+            } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "uri",
-            "huggingface": { "dataset_path": "tatsu-lab/alpaca", "params": { "split":
+            "uri": "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
-            "train" } } } - { "type": "rows", "rows": [ { "messages": [ {"role": "user",
+            "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows",
-            "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
+            "rows": [ { "messages": [ {"role": "user", "content": "Hello, world!"},
-            world!"}, ] } ] }
+            {"role": "assistant", "content": "Hello, world!"}, ] } ] }
        metadata:
          type: object
          additionalProperties:
@ -6824,7 +6788,7 @@ components:
        dataset_id:
          type: string
          description: >-
-            The ID of the dataset. If not provided, a random ID will be generated.
+            The ID of the dataset. If not provided, an ID will be generated.
      additionalProperties: false
      required:
        - purpose
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@ -37,8 +37,8 @@ class DatasetIO(Protocol):
    # keeping for aligning with inference/safety, but this is not used
    dataset_store: DatasetStore
-    @webmethod(route="/datasetio/rows", method="GET")
+    @webmethod(route="/datasets/{dataset_id}/iterrows", method="GET")
-    async def get_rows_paginated(
+    async def iterrows(
        self,
        dataset_id: str,
        rows_in_page: int,
@ -54,5 +54,7 @@ class DatasetIO(Protocol):
        """
        ...
-    @webmethod(route="/datasetio/rows", method="POST")
+    @webmethod(route="/datasets/{dataset_id}/rows", method="POST")
-    async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
+    async def append_rows(
        self, dataset_id: str, rows: List[Dict[str, Any]]
    ) -> None: ...
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@ -60,6 +60,7 @@ class DatasetPurpose(Enum):
            "answer": "Paris"
        }
    """
    post_training_messages = "post-training/messages"
    eval_question_answer = "eval/question-answer"
@ -75,11 +76,10 @@ class DatasetPurpose(Enum):
 class DatasetType(Enum):
    """
    Type of the dataset source.
-    :cvar huggingface: The dataset is stored in Huggingface.
+    :cvar uri: The dataset can be obtained from a URI.
-    :cvar uri: The dataset can be obtained from a URI. 
+    :cvar rows: The dataset is stored in rows.
    :cvar rows: The dataset is stored in rows. 
    """
-    huggingface = "huggingface"
+
    uri = "uri"
    rows = "rows"
@ -92,30 +92,11 @@ class URIDataSource(BaseModel):
        - "lsfs://mydata.jsonl"
        - "data:csv;base64,{base64_content}"
    """
    type: Literal["uri"] = "uri"
    uri: str
 class HuggingfaceDataSourceFields(BaseModel):
    """The fields for a Huggingface dataset.
    :param path: The path to the dataset in Huggingface. E.g.
        - "llamastack/simpleqa"
    :param params: The parameters for the dataset.
    """
    path: str
    params: Dict[str, Any]
@json_schema_type
 class HuggingfaceDataSource(BaseModel):
    """A dataset stored in Huggingface.
    :param type: The type of the data source.
    :param huggingface: The fields for a Huggingface dataset.
    """
    type: Literal["huggingface"] = "huggingface"
    huggingface: HuggingfaceDataSourceFields
@json_schema_type
 class RowsDataSource(BaseModel):
    """A dataset stored in rows.
@ -124,13 +105,14 @@ class RowsDataSource(BaseModel):
            {"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}
        ]
    """
    type: Literal["rows"] = "rows"
    rows: List[Dict[str, Any]]
 DataSource = register_schema(
    Annotated[
-        Union[URIDataSource, HuggingfaceDataSource, RowsDataSource],
+        Union[URIDataSource, RowsDataSource],
        Field(discriminator="type"),
    ],
    name="DataSource",
@ -141,6 +123,7 @@ class CommonDatasetFields(BaseModel):
    """
    Common fields for a dataset.
    """
    purpose: DatasetPurpose
    source: DataSource
    metadata: Dict[str, Any] = Field(
@ -237,13 +220,12 @@ class Datasets(Protocol):
               "uri": "lsfs://mydata.jsonl"
           }
           - {
-               "type": "huggingface",
+               "type": "uri",
-               "huggingface": {
+               "uri": "data:csv;base64,{base64_content}"
-                   "dataset_path": "tatsu-lab/alpaca",
+           }
-                   "params": {
+           - {
-                       "split": "train"
+               "type": "uri",
-                   }
+               "uri": "huggingface://llamastack/simpleqa?split=train"
               }
           }
           - {
               "type": "rows",
@ -258,7 +240,7 @@ class Datasets(Protocol):
           }
        :param metadata: The metadata for the dataset.
           - E.g. {"description": "My dataset"}
-        :param dataset_id: The ID of the dataset. If not provided, a random ID will be generated.
+        :param dataset_id: The ID of the dataset. If not provided, an ID will be generated.
        """
        ...