This commit is contained in:
Xi Yan 2024-10-03 11:18:23 -07:00
parent b9b1e8b08b
commit 5e9301de90
2 changed files with 28 additions and 27 deletions

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from enum import Enum
# from enum import Enum
from typing import Any, Dict, Optional, Protocol
from llama_models.llama3.api.datatypes import URL
@ -14,22 +14,12 @@ from llama_models.schema_utils import json_schema_type, webmethod
from pydantic import BaseModel
@json_schema_type
class TrainEvalDatasetColumnType(Enum):
dialog = "dialog"
text = "text"
media = "media"
number = "number"
json = "json"
@json_schema_type
class TrainEvalDataset(BaseModel):
"""Dataset to be used for training or evaluating language models."""
# TODO(ashwin): figure out if we need to add an enum for a "dataset type"
columns: Dict[str, TrainEvalDatasetColumnType]
# unique identifier associated with the dataset
dataset_id: str
content_url: URL
metadata: Optional[Dict[str, Any]] = None