reward scoring

This commit is contained in:
Raghotham Murthy 2024-07-10 21:56:16 -07:00
parent 69ecf55de2
commit ebb59aa35f
3 changed files with 117 additions and 79 deletions

View file

@ -218,25 +218,33 @@ class AgenticSystem(Protocol):
@dataclass
class PromptGeneration:
# TODO(ashwin): probably create a Dialog type which is used everywhere including chat completion
class KPromptGenerations:
prompt: Message
message_history: List[Message]
generation: Message
k_generations: List[Message]
@json_schema_type
@dataclass
class ScoredPromptGeneration:
prompt_generation: PromptGeneration
class MessageScore:
"""A single message and its score."""
message: Message
score: float
@json_schema_type
@dataclass
class KScoredPromptGenerations:
prompt: Message
k_scored_generations: List[MessageScore]
@json_schema_type
@dataclass
class RewardScoringRequest:
"""Request to score a reward function. A list of prompts and a list of responses per prompt."""
prompt_generations: List[PromptGeneration]
prompt_generations: List[KPromptGenerations]
# TODO(ragho): create a RewardModel enum tye
model: str
@ -247,7 +255,7 @@ class RewardScoringRequest:
class RewardScoringResponse:
"""Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."""
scored_generations: List[ScoredPromptGeneration]
scored_generations: List[KScoredPromptGenerations]
class RewardScoring(Protocol):