sdg improvements

This commit is contained in:
Raghotham Murthy 2024-07-10 22:58:29 -07:00
parent c1f6816d76
commit d9367054df

View file

@ -283,11 +283,9 @@ class FilteringFunction(Enum):
class SyntheticDataGenerationRequest:
"""Request to generate synthetic data. A small batch of prompts and a filtering function"""
prompts: List[str]
prompts: List[Message]
filtering_function: FilteringFunction = FilteringFunction.none
# TODO(ragho): fix this
# reward_scoring: RewardScoring
reward_scoring: Optional[RewardScoring] = None
@json_schema_type
@ -295,11 +293,8 @@ class SyntheticDataGenerationRequest:
class SyntheticDataGenerationResponse:
"""Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."""
synthetic_data: List[Tuple[str, str, float]]
"""The actual synthetic data"""
statistics: Dict[str, float]
"""Statistics on how many prompts were generated and how many were filtered out"""
synthetic_data: List[KScoredPromptGenerations]
statistics: Optional[Dict[str, Any]] = None
class SyntheticDataGeneration(Protocol):