sdg improvements

2025-12-03 09:53:45 +00:00 · 2024-07-10 22:58:29 -07:00 · 2024-07-10 22:58:29 -07:00 · d9367054df
commit d9367054df
parent c1f6816d76
1 changed files with 5 additions and 10 deletions
--- a/source/api_definitions.py
+++ b/source/api_definitions.py
@ -283,23 +283,18 @@ class FilteringFunction(Enum):
 class SyntheticDataGenerationRequest:
    """Request to generate synthetic data. A small batch of prompts and a filtering function"""
-    prompts: List[str]
+    prompts: List[Message]
    filtering_function: FilteringFunction = FilteringFunction.none
-
+    reward_scoring: Optional[RewardScoring] = None
    # TODO(ragho): fix this
    # reward_scoring: RewardScoring
@json_schema_type
@dataclass
 class SyntheticDataGenerationResponse:
-    """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."""
+     """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."""
-    synthetic_data: List[Tuple[str, str, float]]
+     synthetic_data: List[KScoredPromptGenerations]
-
+     statistics: Optional[Dict[str, Any]] = None
    """The actual synthetic data"""
    statistics: Dict[str, float]
    """Statistics on how many prompts were generated and how many were filtered out"""
 class SyntheticDataGeneration(Protocol):