From c9a75c46289a01a48ffec43d0e7d5b840da46b48 Mon Sep 17 00:00:00 2001
From: Hardik Shah <hjshah@fb.com>
Date: Wed, 26 Jun 2024 16:25:42 -0700
Subject: [PATCH] update reward model scoring apis

---
 reward_model_scoring.yaml            | 237 +++++++++++++--------------
 simple_view/reward_model_scoring.yml |  70 ++++++++
 synthetic_data_generation.yaml       |   2 +-
 3 files changed, 182 insertions(+), 127 deletions(-)
 create mode 100644 simple_view/reward_model_scoring.yml

diff --git a/reward_model_scoring.yaml b/reward_model_scoring.yaml
index a174c63f4..29229afdc 100644
--- a/reward_model_scoring.yaml
+++ b/reward_model_scoring.yaml
@@ -1,108 +1,13 @@
 openapi: 3.0.0
 info:
-  title: Reward Model Service
-  version: 1.0.0
-components:
-  schemas:
-    Message:
-      # TODO: Keep in sync with /chat_completion
-      type: object
-      properties:
-        role:
-          type: string
-          description: Role of the entity in the message.
-        text:
-          type: string
-          description: Text content of the message.
-        attachments:
-          type: array
-          items:
-            $ref: '#/components/schemas/Attachment'
-    Completion:
-      # TODO: Keep in sync with /chat_completion
-      type: object
-      properties:
-        id:
-          type: string
-          description: Unique identifier for the completion.
-        role:
-          type: string
-        text:
-          type: string
-        attachments:
-          type: array
-          items:
-            $ref: '#/components/schemas/Attachment'
-        tokens:
-          type: array
-          items:
-            type: integer
-        logprobs:
-          type: array
-          items:
-            type: number
-        finish_reason:
-          type: string
-    # Not Used in API but this json encoded version will be used in batch callsite
-    PromptResponsePair:
-      type: object
-      properties:
-        id:
-          type: string
-          description: Unique identifier for the prompt-response pair.
-        prompt:
-          type: array
-          items:
-            $ref: '#/components/schemas/Message'
-        response:
-          $ref: '#/components/schemas/Completion'
-    # Not Used in API but this json encoded version will be used in batch callsite
-    PromptResponseScore:
-      type: object
-      properties:
-        id:
-          type: string
-          description: Identifier carried over from the user provided id.
-        raw_score:
-          type: number
-          format: float
-          description: Raw score before any scoring function is applied.
-        score:
-          type: number
-          format: float
-          description: Final score after applying the scoring function.
-    BatchRewardModelScoring:
-      type: object
-      properties:
-        job_id:
-          type: string
-          description: Unique identifier for the job.
-        created:
-          type: string
-          format: date-time
-          description: Timestamp when the job was created.
-        status:
-          type: string
-          enum: [started, completed, running]
-          description: Current status of the job.
-        input_path:
-          type: string
-          description: User provided input path.
-        success_file_path:
-          type: string
-          description: Path to the success file containing scores.
-        error_file_path:
-          type: string
-          description: Path to the error file.
-        metadata:
-          type: object
-          additionalProperties: true
-          description: User provided metadata carried forward in the response.
+  title: Reward Model Service API
+  version: 0.0.1
 paths:
   /reward_model_scoring/:
     post:
-      summary: Score a pair of prompt and response using a reward model
-      description: Take a pair of prompt and responses and score them using a reward model
+      summary: Score a prompt-response pair using a reward model
+      description: |
+        This endpoint scores a given prompt-response pair using a specified reward model and scoring function.
       requestBody:
         required: true
         content:
@@ -110,19 +15,18 @@ paths:
             schema:
               type: object
               properties:
-                reward_model:
-                  type: string
-                  description: Unique identifier for the reward model 
-                messages:
+                prompt_with_response:
                   type: array
                   items:
                     $ref: '#/components/schemas/Message'
-                  description: List[Message] same as what is used in /chat_completion api 
-                response:
-                  $ref: '#/components/schemas/Completion'
-                  description: Completion same as what is used in /chat_completion api 
-                scoring_function:
+                  description: "Prompt and response joined as a list of messages."
+                reward_model:
                   type: string
+                  description: "Identifier for the reward model to be used."
+                scoring_function:
+                  $ref: '#/components/schemas/ScoringFunction'
+                options:
+                  $ref: '#/components/schemas/Options'
       responses:
         '200':
           description: Scoring completed successfully
@@ -133,18 +37,18 @@ paths:
                 properties:
                   id:
                     type: string
-                    description: Unique identifier for the scoring request
-                  raw_score:
+                  logprob:
                     type: number
                     format: float
                   score:
                     type: number
                     format: float
-                    
-  /batch_reward_model_scoring/:
+
+  /reward_model_scoring/submit_job/:
     post:
-      summary: Batch score multiple pairs of prompts and responses
-      description: Take a pair of prompt and responses and score them using a reward model
+      summary: Batch scoring using reward models
+      description: |
+        Submit a batch job for scoring multiple prompt-response pairs using a reward model.
       requestBody:
         required: true
         content:
@@ -154,24 +58,105 @@ paths:
               properties:
                 reward_model:
                   type: string
-                  description: Unique identifier for the reward model 
-                # TODO: Maybe take local path and api first uploads and generates handle 
-                # which is returned in the repsonse.
-                prompt_response_path:
-                  type: string
-                  description: File path for a JSONL where each line is a JSON encoded `PromptResponsePair` object
+                  description: "Identifier for the reward model to be used."
+                prompt_with_response_path:
+                  $ref: '#/components/schemas/Path'
+                  description: "Path to a JSONL file where each line is a List[Message] and custom_id."
                 scoring_function:
-                  type: string
-                  description: String used to resolve to the appropriate scoring function from a registry
+                  $ref: '#/components/schemas/ScoringFunction'
                 metadata:
                   type: object
                   additionalProperties: true
-                  description: Metadata to carry forward in the response
+                  description: "Metadata to carry forward in the response."
       responses:
         '200':
-          description: Batch scoring job submitted successfully
+          description: Batch scoring job successfully submitted
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchRewardModelScoring'
+                $ref: '#/components/schemas/BatchRewardModelScoringJob'
 
+  /reward_model_scoring/submit_job/job_status:
+    get:
+      summary: Get status for an already submitted job
+      description: |
+        Retrieve the status and details of a previously submitted batch scoring job using its unique job ID.
+      parameters:
+        - in: query
+          name: job_id
+          schema:
+            type: string
+          required: true
+          description: "Unique identifier for the batch scoring job."
+      responses:
+        '200':
+          description: Batch scoring job status retrieved successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchRewardModelScoringJob'
+
+components:
+  schemas:
+    Message:
+      # reuse from /chat_completion
+
+    Options:
+      type: object
+      properties:
+        logprobs:
+          type: boolean
+        max_tokens:
+          type: integer
+        temperature:
+          type: number
+        top_p:
+          type: number
+        # TODO: Add/Remove more reward model specific params
+
+    ScoringFunction:
+      type: object
+      properties:
+        name:
+          type: string
+        params:
+          type: object
+          additionalProperties: true
+
+    Path:
+      type: object
+      properties:
+        value:
+          type: string
+        type:
+          type: string
+          enum:
+            - raw_bytes
+            - filepath
+            - uri
+
+    BatchRewardModelScoringJob:
+      type: object
+      properties:
+        job_id:
+          type: string
+        created:
+          type: string
+          format: date-time
+        status:
+          type: string
+          enum:
+            - validating
+            - running
+            - completed
+            - failed
+        input_file_path:
+          $ref: '#/components/schemas/Path'
+        success_file_path:
+          $ref: '#/components/schemas/Path'
+        error_file_path:
+          $ref: '#/components/schemas/Path'
+        metadata:
+          type: object
+          additionalProperties: true
+          description: "Metadata carried forward from the job submission."
diff --git a/simple_view/reward_model_scoring.yml b/simple_view/reward_model_scoring.yml
new file mode 100644
index 000000000..676b1d835
--- /dev/null
+++ b/simple_view/reward_model_scoring.yml
@@ -0,0 +1,70 @@
+# Reward Model Service
+== Schema ==
+Message:
+  # Same as /chat_completion
+
+Options:
+  logprobs: bool
+  max_tokens: int
+  temperature: float
+  top_p: float
+  #TODO: Figure out what other reward model specific params
+
+ScoringFunction:
+  name: str
+  params: json
+
+BatchRewardModelScoringJob:
+  job_id: str
+  created: str  # format date-time
+  status: string  # enum (validating, running, completed, failed)
+  input_file_path: Path
+  success_file_path: Path  # jsonl where each row has {custom_id: <from input>, logprob: <float>, score: float}
+  error_file_path: Path  # jsonl where each row has {custom_id: <from input>, error: <error_info>}
+  metadata: json  # carry forward from job submission api
+
+
+== Callsites ==
+
+callsite:
+  reward_model_scoring/
+request_type:
+  post
+description:
+  Score a prompt-response pair using a reward model
+request:
+  prompt_with_response: List[Message]  # prompt and response joined as a List[Message]
+  reward_model: str
+  scoring_function: ScoringFunction
+  options: Options
+response:
+  id: str
+  logprob: float
+  score: float
+
+
+callsite:
+  reward_model_scoring/submit_job/
+request_type:
+  post
+description:
+  Batch scoring using reward models
+request:
+  reward_model: str
+  prompt_with_response_path: Path  # jsonl file where each line is a List[Message] and custom_id
+  scoring_function: ScoringFunction
+  metadata: json  # anything to carry forward over in the response
+response:
+  batch_reward_model_scoring_job: BatchRewardModelScoringJob
+
+
+callsite:
+  /reward_model_scoring/submit_job/job_status
+request_type:
+  get
+description:
+  Get status for an already submitted job
+request:
+  job_id: str  # unique identifier for the job
+response:
+  batch_reward_model_scoring_job: BatchRewardModelScoringJob
diff --git a/synthetic_data_generation.yaml b/synthetic_data_generation.yaml
index b3bdecd58..81e8f4965 100644
--- a/synthetic_data_generation.yaml
+++ b/synthetic_data_generation.yaml
@@ -20,7 +20,7 @@ paths:
                   description: Model identifier for batch inference.
                 prompts_path:
                   type: string
-                  description: Path to prompts, JSONL where each row is formatted for batch inference.
+                  description: Path to prompts, JSONL for batch inference
                 batch_size:
                   type: integer
                   description: Number of prompts to process in each batch.