reward scoring

2025-12-03 09:53:45 +00:00 · 2024-07-10 21:56:16 -07:00 · 2024-07-10 21:56:16 -07:00 · ebb59aa35f
commit ebb59aa35f
parent 69ecf55de2
3 changed files with 117 additions and 79 deletions
--- a/source/api_definitions.py
+++ b/source/api_definitions.py
@ -218,25 +218,33 @@ class AgenticSystem(Protocol):
@dataclass
-class PromptGeneration:
+class KPromptGenerations:
    # TODO(ashwin): probably create a Dialog type which is used everywhere including chat completion
    prompt: Message
    message_history: List[Message]
-    generation: Message
+    k_generations: List[Message]
@json_schema_type
@dataclass
-class ScoredPromptGeneration:
+class MessageScore:
-    prompt_generation: PromptGeneration
+    """A single message and its score."""
    message: Message
    score: float
@json_schema_type
@dataclass
 class KScoredPromptGenerations:
    prompt: Message
    k_scored_generations: List[MessageScore]
@json_schema_type
@dataclass
 class RewardScoringRequest:
    """Request to score a reward function. A list of prompts and a list of responses per prompt."""
-    prompt_generations: List[PromptGeneration]
+    prompt_generations: List[KPromptGenerations]
    # TODO(ragho): create a RewardModel enum tye
    model: str
@ -247,7 +255,7 @@ class RewardScoringRequest:
 class RewardScoringResponse:
    """Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."""
-    scored_generations: List[ScoredPromptGeneration]
+    scored_generations: List[KScoredPromptGenerations]
 class RewardScoring(Protocol):
--- a/source/openapi.html
+++ b/source/openapi.html
@ -1960,15 +1960,18 @@
                                        "$ref": "#/components/schemas/Message"
                                    }
                                },
-                                "generation": {
+                                "k_generations": {
-                                    "$ref": "#/components/schemas/Message"
+                                    "type": "array",
                                    "items": {
                                        "$ref": "#/components/schemas/Message"
                                    }
                                }
                            },
                            "additionalProperties": false,
                            "required": [
                                "prompt",
                                "message_history",
-                                "generation"
+                                "k_generations"
                            ]
                        }
                    },
@ -1983,46 +1986,49 @@
                ],
                "title": "Request to score a reward function. A list of prompts and a list of responses per prompt."
            },
            "KScoredPromptGenerations": {
                "type": "object",
                "properties": {
                    "prompt": {
                        "$ref": "#/components/schemas/Message"
                    },
                    "k_scored_generations": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/MessageScore"
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "prompt",
                    "k_scored_generations"
                ]
            },
            "MessageScore": {
                "type": "object",
                "properties": {
                    "message": {
                        "$ref": "#/components/schemas/Message"
                    },
                    "score": {
                        "type": "number"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "message",
                    "score"
                ],
                "title": "A single message and its score."
            },
            "RewardScoringResponse": {
                "type": "object",
                "properties": {
                    "scored_generations": {
                        "type": "array",
                        "items": {
-                            "type": "object",
+                            "$ref": "#/components/schemas/KScoredPromptGenerations"
                            "properties": {
                                "prompt_generation": {
                                    "type": "object",
                                    "properties": {
                                        "prompt": {
                                            "$ref": "#/components/schemas/Message"
                                        },
                                        "message_history": {
                                            "type": "array",
                                            "items": {
                                                "$ref": "#/components/schemas/Message"
                                            }
                                        },
                                        "generation": {
                                            "$ref": "#/components/schemas/Message"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "required": [
                                        "prompt",
                                        "message_history",
                                        "generation"
                                    ]
                                },
                                "score": {
                                    "type": "number"
                                }
                            },
                            "additionalProperties": false,
                            "required": [
                                "prompt_generation",
                                "score"
                            ]
                        }
                    }
                },
@ -2306,11 +2312,14 @@
        }
    ],
    "tags": [
        {
            "name": "Inference"
        },
        {
            "name": "SyntheticDataGeneration"
        },
        {
-            "name": "RewardScoring"
+            "name": "Datasets"
        },
        {
            "name": "AgenticSystem"
@ -2319,10 +2328,7 @@
            "name": "Finetuning"
        },
        {
-            "name": "Inference"
+            "name": "RewardScoring"
        },
        {
            "name": "Datasets"
        },
        {
            "name": "ShieldConfig",
@ -2416,6 +2422,14 @@
            "name": "RewardScoringRequest",
            "description": "Request to score a reward function. A list of prompts and a list of responses per prompt.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringRequest\" />"
        },
        {
            "name": "KScoredPromptGenerations",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/KScoredPromptGenerations\" />"
        },
        {
            "name": "MessageScore",
            "description": "A single message and its score.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/MessageScore\" />"
        },
        {
            "name": "RewardScoringResponse",
            "description": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringResponse\" />"
@ -2470,8 +2484,10 @@
                "FinetuningJobLogStream",
                "FinetuningJobStatusResponse",
                "FinetuningTrainRequest",
                "KScoredPromptGenerations",
                "LoraFinetuningConfig",
                "Message",
                "MessageScore",
                "OptimizerConfig",
                "RewardScoringRequest",
                "RewardScoringResponse",
--- a/source/openapi.yaml
+++ b/source/openapi.yaml
@ -900,6 +900,19 @@ components:
      - logger_config
      title: Request to finetune a model.
      type: object
    KScoredPromptGenerations:
      additionalProperties: false
      properties:
        k_scored_generations:
          items:
            $ref: '#/components/schemas/MessageScore'
          type: array
        prompt:
          $ref: '#/components/schemas/Message'
      required:
      - prompt
      - k_scored_generations
      type: object
    LoraFinetuningConfig:
      additionalProperties: false
      properties:
@ -989,6 +1002,18 @@ components:
      - tool_calls
      - tool_responses
      type: object
    MessageScore:
      additionalProperties: false
      properties:
        message:
          $ref: '#/components/schemas/Message'
        score:
          type: number
      required:
      - message
      - score
      title: A single message and its score.
      type: object
    OptimizerConfig:
      additionalProperties: false
      properties:
@ -1019,8 +1044,10 @@ components:
          items:
            additionalProperties: false
            properties:
-              generation:
+              k_generations:
-                $ref: '#/components/schemas/Message'
+                items:
                  $ref: '#/components/schemas/Message'
                type: array
              message_history:
                items:
                  $ref: '#/components/schemas/Message'
@ -1030,7 +1057,7 @@ components:
            required:
            - prompt
            - message_history
-            - generation
+            - k_generations
            type: object
          type: array
      required:
@ -1044,30 +1071,7 @@ components:
      properties:
        scored_generations:
          items:
-            additionalProperties: false
+            $ref: '#/components/schemas/KScoredPromptGenerations'
            properties:
              prompt_generation:
                additionalProperties: false
                properties:
                  generation:
                    $ref: '#/components/schemas/Message'
                  message_history:
                    items:
                      $ref: '#/components/schemas/Message'
                    type: array
                  prompt:
                    $ref: '#/components/schemas/Message'
                required:
                - prompt
                - message_history
                - generation
                type: object
              score:
                type: number
            required:
            - prompt_generation
            - score
            type: object
          type: array
      required:
      - scored_generations
@ -1408,12 +1412,12 @@ security:
 servers:
 - url: http://llama.meta.com
 tags:
 - name: Inference
 - name: SyntheticDataGeneration
- name: RewardScoring
+- name: Datasets
 - name: AgenticSystem
 - name: Finetuning
- name: Inference
+- name: RewardScoring
 - name: Datasets
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
  name: ShieldConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
@ -1522,6 +1526,14 @@ tags:
    <SchemaDefinition schemaRef="#/components/schemas/RewardScoringRequest" />'
  name: RewardScoringRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/KScoredPromptGenerations"
    />
  name: KScoredPromptGenerations
 - description: 'A single message and its score.
    <SchemaDefinition schemaRef="#/components/schemas/MessageScore" />'
  name: MessageScore
 - description: 'Response from the reward scoring. Batch of (prompt, response, score)
    tuples that pass the threshold.
@ -1570,8 +1582,10 @@ x-tagGroups:
  - FinetuningJobLogStream
  - FinetuningJobStatusResponse
  - FinetuningTrainRequest
  - KScoredPromptGenerations
  - LoraFinetuningConfig
  - Message
  - MessageScore
  - OptimizerConfig
  - RewardScoringRequest
  - RewardScoringResponse