reward scoring

2025-10-04 04:04:14 +00:00 · 2024-07-10 21:56:16 -07:00 · 2024-07-10 21:56:16 -07:00 · ebb59aa35f
commit ebb59aa35f
parent 69ecf55de2
3 changed files with 117 additions and 79 deletions
--- a/source/api_definitions.py
+++ b/source/api_definitions.py
@ -218,25 +218,33 @@ class AgenticSystem(Protocol):


@dataclass
-class PromptGeneration:
-    # TODO(ashwin): probably create a Dialog type which is used everywhere including chat completion
+class KPromptGenerations:
    prompt: Message
    message_history: List[Message]
-    generation: Message
+    k_generations: List[Message]


+@json_schema_type
@dataclass
-class ScoredPromptGeneration:
-    prompt_generation: PromptGeneration
+class MessageScore:
+    """A single message and its score."""
+
+    message: Message
    score: float

+@json_schema_type
+@dataclass
+class KScoredPromptGenerations:
+    prompt: Message
+    k_scored_generations: List[MessageScore]
+

@json_schema_type
@dataclass
 class RewardScoringRequest:
    """Request to score a reward function. A list of prompts and a list of responses per prompt."""

-    prompt_generations: List[PromptGeneration]
+    prompt_generations: List[KPromptGenerations]

    # TODO(ragho): create a RewardModel enum tye
    model: str
@ -247,7 +255,7 @@ class RewardScoringRequest:
 class RewardScoringResponse:
    """Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."""

-    scored_generations: List[ScoredPromptGeneration]
+    scored_generations: List[KScoredPromptGenerations]


 class RewardScoring(Protocol):