sdg improvements

This commit is contained in:
Raghotham Murthy 2024-07-10 22:58:29 -07:00
parent c1f6816d76
commit d9367054df

View file

@ -283,23 +283,18 @@ class FilteringFunction(Enum):
class SyntheticDataGenerationRequest: class SyntheticDataGenerationRequest:
"""Request to generate synthetic data. A small batch of prompts and a filtering function""" """Request to generate synthetic data. A small batch of prompts and a filtering function"""
prompts: List[str] prompts: List[Message]
filtering_function: FilteringFunction = FilteringFunction.none filtering_function: FilteringFunction = FilteringFunction.none
reward_scoring: Optional[RewardScoring] = None
# TODO(ragho): fix this
# reward_scoring: RewardScoring
@json_schema_type @json_schema_type
@dataclass @dataclass
class SyntheticDataGenerationResponse: class SyntheticDataGenerationResponse:
"""Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.""" """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."""
synthetic_data: List[Tuple[str, str, float]] synthetic_data: List[KScoredPromptGenerations]
statistics: Optional[Dict[str, Any]] = None
"""The actual synthetic data"""
statistics: Dict[str, float]
"""Statistics on how many prompts were generated and how many were filtered out"""
class SyntheticDataGeneration(Protocol): class SyntheticDataGeneration(Protocol):