update reward model scoring apis

2025-06-28 02:53:30 +00:00 · 2024-06-26 16:25:42 -07:00 · 2024-06-26 16:25:42 -07:00 · c9a75c4628
commit c9a75c4628
parent 04f89ad315
3 changed files with 182 additions and 127 deletions
--- a/reward_model_scoring.yaml
+++ b/reward_model_scoring.yaml
@ -1,108 +1,13 @@
 openapi: 3.0.0
 info:
-  title: Reward Model Service
+  title: Reward Model Service API
-  version: 1.0.0
+  version: 0.0.1
 components:
  schemas:
    Message:
      # TODO: Keep in sync with /chat_completion
      type: object
      properties:
        role:
          type: string
          description: Role of the entity in the message.
        text:
          type: string
          description: Text content of the message.
        attachments:
          type: array
          items:
            $ref: '#/components/schemas/Attachment'
    Completion:
      # TODO: Keep in sync with /chat_completion
      type: object
      properties:
        id:
          type: string
          description: Unique identifier for the completion.
        role:
          type: string
        text:
          type: string
        attachments:
          type: array
          items:
            $ref: '#/components/schemas/Attachment'
        tokens:
          type: array
          items:
            type: integer
        logprobs:
          type: array
          items:
            type: number
        finish_reason:
          type: string
    # Not Used in API but this json encoded version will be used in batch callsite
    PromptResponsePair:
      type: object
      properties:
        id:
          type: string
          description: Unique identifier for the prompt-response pair.
        prompt:
          type: array
          items:
            $ref: '#/components/schemas/Message'
        response:
          $ref: '#/components/schemas/Completion'
    # Not Used in API but this json encoded version will be used in batch callsite
    PromptResponseScore:
      type: object
      properties:
        id:
          type: string
          description: Identifier carried over from the user provided id.
        raw_score:
          type: number
          format: float
          description: Raw score before any scoring function is applied.
        score:
          type: number
          format: float
          description: Final score after applying the scoring function.
    BatchRewardModelScoring:
      type: object
      properties:
        job_id:
          type: string
          description: Unique identifier for the job.
        created:
          type: string
          format: date-time
          description: Timestamp when the job was created.
        status:
          type: string
          enum: [started, completed, running]
          description: Current status of the job.
        input_path:
          type: string
          description: User provided input path.
        success_file_path:
          type: string
          description: Path to the success file containing scores.
        error_file_path:
          type: string
          description: Path to the error file.
        metadata:
          type: object
          additionalProperties: true
          description: User provided metadata carried forward in the response.
 paths:
  /reward_model_scoring/:
    post:
-      summary: Score a pair of prompt and response using a reward model
+      summary: Score a prompt-response pair using a reward model
-      description: Take a pair of prompt and responses and score them using a reward model
+      description: |
        This endpoint scores a given prompt-response pair using a specified reward model and scoring function.
      requestBody:
        required: true
        content:
@ -110,19 +15,18 @@ paths:
            schema:
              type: object
              properties:
-                reward_model:
+                prompt_with_response:
                  type: string
                  description: Unique identifier for the reward model 
                messages:
                  type: array
                  items:
                    $ref: '#/components/schemas/Message'
-                  description: List[Message] same as what is used in /chat_completion api 
+                  description: "Prompt and response joined as a list of messages."
-                response:
+                reward_model:
                  $ref: '#/components/schemas/Completion'
                  description: Completion same as what is used in /chat_completion api 
                scoring_function:
                  type: string
                  description: "Identifier for the reward model to be used."
                scoring_function:
                  $ref: '#/components/schemas/ScoringFunction'
                options:
                  $ref: '#/components/schemas/Options'
      responses:
        '200':
          description: Scoring completed successfully
@ -133,18 +37,18 @@ paths:
                properties:
                  id:
                    type: string
-                    description: Unique identifier for the scoring request
+                  logprob:
                  raw_score:
                    type: number
                    format: float
                  score:
                    type: number
                    format: float
-  /batch_reward_model_scoring/:
+  /reward_model_scoring/submit_job/:
    post:
-      summary: Batch score multiple pairs of prompts and responses
+      summary: Batch scoring using reward models
-      description: Take a pair of prompt and responses and score them using a reward model
+      description: |
        Submit a batch job for scoring multiple prompt-response pairs using a reward model.
      requestBody:
        required: true
        content:
@ -154,24 +58,105 @@ paths:
              properties:
                reward_model:
                  type: string
-                  description: Unique identifier for the reward model 
+                  description: "Identifier for the reward model to be used."
-                # TODO: Maybe take local path and api first uploads and generates handle 
+                prompt_with_response_path:
-                # which is returned in the repsonse.
+                  $ref: '#/components/schemas/Path'
-                prompt_response_path:
+                  description: "Path to a JSONL file where each line is a List[Message] and custom_id."
                  type: string
                  description: File path for a JSONL where each line is a JSON encoded `PromptResponsePair` object
                scoring_function:
-                  type: string
+                  $ref: '#/components/schemas/ScoringFunction'
                  description: String used to resolve to the appropriate scoring function from a registry
                metadata:
                  type: object
                  additionalProperties: true
-                  description: Metadata to carry forward in the response
+                  description: "Metadata to carry forward in the response."
      responses:
        '200':
-          description: Batch scoring job submitted successfully
+          description: Batch scoring job successfully submitted
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/BatchRewardModelScoring'
+                $ref: '#/components/schemas/BatchRewardModelScoringJob'
  /reward_model_scoring/submit_job/job_status:
    get:
      summary: Get status for an already submitted job
      description: |
        Retrieve the status and details of a previously submitted batch scoring job using its unique job ID.
      parameters:
        - in: query
          name: job_id
          schema:
            type: string
          required: true
          description: "Unique identifier for the batch scoring job."
      responses:
        '200':
          description: Batch scoring job status retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchRewardModelScoringJob'
 components:
  schemas:
    Message:
      # reuse from /chat_completion
    Options:
      type: object
      properties:
        logprobs:
          type: boolean
        max_tokens:
          type: integer
        temperature:
          type: number
        top_p:
          type: number
        # TODO: Add/Remove more reward model specific params
    ScoringFunction:
      type: object
      properties:
        name:
          type: string
        params:
          type: object
          additionalProperties: true
    Path:
      type: object
      properties:
        value:
          type: string
        type:
          type: string
          enum:
            - raw_bytes
            - filepath
            - uri
    BatchRewardModelScoringJob:
      type: object
      properties:
        job_id:
          type: string
        created:
          type: string
          format: date-time
        status:
          type: string
          enum:
            - validating
            - running
            - completed
            - failed
        input_file_path:
          $ref: '#/components/schemas/Path'
        success_file_path:
          $ref: '#/components/schemas/Path'
        error_file_path:
          $ref: '#/components/schemas/Path'
        metadata:
          type: object
          additionalProperties: true
          description: "Metadata carried forward from the job submission."
--- a/simple_view/reward_model_scoring.yml
+++ b/simple_view/reward_model_scoring.yml
@ -0,0 +1,70 @@
 # Reward Model Service
 == Schema ==
 Message:
  # Same as /chat_completion
 Options:
  logprobs: bool
  max_tokens: int
  temperature: float
  top_p: float
  #TODO: Figure out what other reward model specific params
 ScoringFunction:
  name: str
  params: json
 BatchRewardModelScoringJob:
  job_id: str
  created: str  # format date-time
  status: string  # enum (validating, running, completed, failed)
  input_file_path: Path
  success_file_path: Path  # jsonl where each row has {custom_id: <from input>, logprob: <float>, score: float}
  error_file_path: Path  # jsonl where each row has {custom_id: <from input>, error: <error_info>}
  metadata: json  # carry forward from job submission api
 == Callsites ==
 callsite:
  reward_model_scoring/
 request_type:
  post
 description:
  Score a prompt-response pair using a reward model
 request:
  prompt_with_response: List[Message]  # prompt and response joined as a List[Message]
  reward_model: str
  scoring_function: ScoringFunction
  options: Options
 response:
  id: str
  logprob: float
  score: float
 callsite:
  reward_model_scoring/submit_job/
 request_type:
  post
 description:
  Batch scoring using reward models
 request:
  reward_model: str
  prompt_with_response_path: Path  # jsonl file where each line is a List[Message] and custom_id
  scoring_function: ScoringFunction
  metadata: json  # anything to carry forward over in the response
 response:
  batch_reward_model_scoring_job: BatchRewardModelScoringJob
 callsite:
  /reward_model_scoring/submit_job/job_status
 request_type:
  get
 description:
  Get status for an already submitted job
 request:
  job_id: str  # unique identifier for the job
 response:
  batch_reward_model_scoring_job: BatchRewardModelScoringJob
--- a/synthetic_data_generation.yaml
+++ b/synthetic_data_generation.yaml
@ -20,7 +20,7 @@ paths:
                  description: Model identifier for batch inference.
                prompts_path:
                  type: string
-                  description: Path to prompts, JSONL where each row is formatted for batch inference.
+                  description: Path to prompts, JSONL for batch inference
                batch_size:
                  type: integer
                  description: Number of prompts to process in each batch.