update reward model scoring apis

2025-06-28 02:53:30 +00:00 · 2024-06-26 16:25:42 -07:00 · 2024-06-26 16:25:42 -07:00 · c9a75c4628
commit c9a75c4628
parent 04f89ad315
3 changed files with 182 additions and 127 deletions
--- a/reward_model_scoring.yaml
+++ b/reward_model_scoring.yaml
@ -1,108 +1,13 @@
 openapi: 3.0.0
 info:
-  title: Reward Model Service
-  version: 1.0.0
-components:
-  schemas:
-    Message:
-      # TODO: Keep in sync with /chat_completion
-      type: object
-      properties:
-        role:
-          type: string
-          description: Role of the entity in the message.
-        text:
-          type: string
-          description: Text content of the message.
-        attachments:
-          type: array
-          items:
-            $ref: '#/components/schemas/Attachment'
-    Completion:
-      # TODO: Keep in sync with /chat_completion
-      type: object
-      properties:
-        id:
-          type: string
-          description: Unique identifier for the completion.
-        role:
-          type: string
-        text:
-          type: string
-        attachments:
-          type: array
-          items:
-            $ref: '#/components/schemas/Attachment'
-        tokens:
-          type: array
-          items:
-            type: integer
-        logprobs:
-          type: array
-          items:
-            type: number
-        finish_reason:
-          type: string
-    # Not Used in API but this json encoded version will be used in batch callsite
-    PromptResponsePair:
-      type: object
-      properties:
-        id:
-          type: string
-          description: Unique identifier for the prompt-response pair.
-        prompt:
-          type: array
-          items:
-            $ref: '#/components/schemas/Message'
-        response:
-          $ref: '#/components/schemas/Completion'
-    # Not Used in API but this json encoded version will be used in batch callsite
-    PromptResponseScore:
-      type: object
-      properties:
-        id:
-          type: string
-          description: Identifier carried over from the user provided id.
-        raw_score:
-          type: number
-          format: float
-          description: Raw score before any scoring function is applied.
-        score:
-          type: number
-          format: float
-          description: Final score after applying the scoring function.
-    BatchRewardModelScoring:
-      type: object
-      properties:
-        job_id:
-          type: string
-          description: Unique identifier for the job.
-        created:
-          type: string
-          format: date-time
-          description: Timestamp when the job was created.
-        status:
-          type: string
-          enum: [started, completed, running]
-          description: Current status of the job.
-        input_path:
-          type: string
-          description: User provided input path.
-        success_file_path:
-          type: string
-          description: Path to the success file containing scores.
-        error_file_path:
-          type: string
-          description: Path to the error file.
-        metadata:
-          type: object
-          additionalProperties: true
-          description: User provided metadata carried forward in the response.
+  title: Reward Model Service API
+  version: 0.0.1
 paths:
  /reward_model_scoring/:
    post:
-      summary: Score a pair of prompt and response using a reward model
-      description: Take a pair of prompt and responses and score them using a reward model
+      summary: Score a prompt-response pair using a reward model
+      description: |
+        This endpoint scores a given prompt-response pair using a specified reward model and scoring function.
      requestBody:
        required: true
        content:
@ -110,19 +15,18 @@ paths:
            schema:
              type: object
              properties:
-                reward_model:
-                  type: string
-                  description: Unique identifier for the reward model 
-                messages:
+                prompt_with_response:
                  type: array
                  items:
                    $ref: '#/components/schemas/Message'
-                  description: List[Message] same as what is used in /chat_completion api 
-                response:
-                  $ref: '#/components/schemas/Completion'
-                  description: Completion same as what is used in /chat_completion api 
-                scoring_function:
+                  description: "Prompt and response joined as a list of messages."
+                reward_model:
                  type: string
+                  description: "Identifier for the reward model to be used."
+                scoring_function:
+                  $ref: '#/components/schemas/ScoringFunction'
+                options:
+                  $ref: '#/components/schemas/Options'
      responses:
        '200':
          description: Scoring completed successfully
@ -133,18 +37,18 @@ paths:
                properties:
                  id:
                    type: string
-                    description: Unique identifier for the scoring request
-                  raw_score:
+                  logprob:
                    type: number
                    format: float
                  score:
                    type: number
                    format: float

-  /batch_reward_model_scoring/:
+  /reward_model_scoring/submit_job/:
    post:
-      summary: Batch score multiple pairs of prompts and responses
-      description: Take a pair of prompt and responses and score them using a reward model
+      summary: Batch scoring using reward models
+      description: |
+        Submit a batch job for scoring multiple prompt-response pairs using a reward model.
      requestBody:
        required: true
        content:
@ -154,24 +58,105 @@ paths:
              properties:
                reward_model:
                  type: string
-                  description: Unique identifier for the reward model 
-                # TODO: Maybe take local path and api first uploads and generates handle 
-                # which is returned in the repsonse.
-                prompt_response_path:
-                  type: string
-                  description: File path for a JSONL where each line is a JSON encoded `PromptResponsePair` object
+                  description: "Identifier for the reward model to be used."
+                prompt_with_response_path:
+                  $ref: '#/components/schemas/Path'
+                  description: "Path to a JSONL file where each line is a List[Message] and custom_id."
                scoring_function:
-                  type: string
-                  description: String used to resolve to the appropriate scoring function from a registry
+                  $ref: '#/components/schemas/ScoringFunction'
                metadata:
                  type: object
                  additionalProperties: true
-                  description: Metadata to carry forward in the response
+                  description: "Metadata to carry forward in the response."
      responses:
        '200':
-          description: Batch scoring job submitted successfully
+          description: Batch scoring job successfully submitted
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/BatchRewardModelScoring'
+                $ref: '#/components/schemas/BatchRewardModelScoringJob'

+  /reward_model_scoring/submit_job/job_status:
+    get:
+      summary: Get status for an already submitted job
+      description: |
+        Retrieve the status and details of a previously submitted batch scoring job using its unique job ID.
+      parameters:
+        - in: query
+          name: job_id
+          schema:
+            type: string
+          required: true
+          description: "Unique identifier for the batch scoring job."
+      responses:
+        '200':
+          description: Batch scoring job status retrieved successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchRewardModelScoringJob'
+
+components:
+  schemas:
+    Message:
+      # reuse from /chat_completion
+
+    Options:
+      type: object
+      properties:
+        logprobs:
+          type: boolean
+        max_tokens:
+          type: integer
+        temperature:
+          type: number
+        top_p:
+          type: number
+        # TODO: Add/Remove more reward model specific params
+
+    ScoringFunction:
+      type: object
+      properties:
+        name:
+          type: string
+        params:
+          type: object
+          additionalProperties: true
+
+    Path:
+      type: object
+      properties:
+        value:
+          type: string
+        type:
+          type: string
+          enum:
+            - raw_bytes
+            - filepath
+            - uri
+
+    BatchRewardModelScoringJob:
+      type: object
+      properties:
+        job_id:
+          type: string
+        created:
+          type: string
+          format: date-time
+        status:
+          type: string
+          enum:
+            - validating
+            - running
+            - completed
+            - failed
+        input_file_path:
+          $ref: '#/components/schemas/Path'
+        success_file_path:
+          $ref: '#/components/schemas/Path'
+        error_file_path:
+          $ref: '#/components/schemas/Path'
+        metadata:
+          type: object
+          additionalProperties: true
+          description: "Metadata carried forward from the job submission."
--- a/simple_view/reward_model_scoring.yml
+++ b/simple_view/reward_model_scoring.yml
@ -0,0 +1,70 @@
+# Reward Model Service
+== Schema ==
+Message:
+  # Same as /chat_completion
+
+Options:
+  logprobs: bool
+  max_tokens: int
+  temperature: float
+  top_p: float
+  #TODO: Figure out what other reward model specific params
+
+ScoringFunction:
+  name: str
+  params: json
+
+BatchRewardModelScoringJob:
+  job_id: str
+  created: str  # format date-time
+  status: string  # enum (validating, running, completed, failed)
+  input_file_path: Path
+  success_file_path: Path  # jsonl where each row has {custom_id: <from input>, logprob: <float>, score: float}
+  error_file_path: Path  # jsonl where each row has {custom_id: <from input>, error: <error_info>}
+  metadata: json  # carry forward from job submission api
+
+
+== Callsites ==
+
+callsite:
+  reward_model_scoring/
+request_type:
+  post
+description:
+  Score a prompt-response pair using a reward model
+request:
+  prompt_with_response: List[Message]  # prompt and response joined as a List[Message]
+  reward_model: str
+  scoring_function: ScoringFunction
+  options: Options
+response:
+  id: str
+  logprob: float
+  score: float
+
+
+callsite:
+  reward_model_scoring/submit_job/
+request_type:
+  post
+description:
+  Batch scoring using reward models
+request:
+  reward_model: str
+  prompt_with_response_path: Path  # jsonl file where each line is a List[Message] and custom_id
+  scoring_function: ScoringFunction
+  metadata: json  # anything to carry forward over in the response
+response:
+  batch_reward_model_scoring_job: BatchRewardModelScoringJob
+
+
+callsite:
+  /reward_model_scoring/submit_job/job_status
+request_type:
+  get
+description:
+  Get status for an already submitted job
+request:
+  job_id: str  # unique identifier for the job
+response:
+  batch_reward_model_scoring_job: BatchRewardModelScoringJob
--- a/synthetic_data_generation.yaml
+++ b/synthetic_data_generation.yaml
@ -20,7 +20,7 @@ paths:
                  description: Model identifier for batch inference.
                prompts_path:
                  type: string
-                  description: Path to prompts, JSONL where each row is formatted for batch inference.
+                  description: Path to prompts, JSONL for batch inference
                batch_size:
                  type: integer
                  description: Number of prompts to process in each batch.