llama-stack-mirror/reward_model_scoring.yaml

openapi: 3.0.0
info:
  title: Reward Model Service API
  version: 0.0.1
paths:
  /reward_model_scoring/:
    post:
      summary: Score a prompt-response pair using a reward model
      description: |
        This endpoint scores a given prompt-response pair using a specified reward model and scoring function.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                prompt_with_response:
                  type: array
                  items:
                    $ref: '#/components/schemas/Message'
                  description: "Prompt and response joined as a list of messages."
                reward_model:
                  type: string
                  description: "Identifier for the reward model to be used."
                scoring_function:
                  $ref: '#/components/schemas/ScoringFunction'
                options:
                  $ref: '#/components/schemas/Options'
      responses:
        '200':
          description: Scoring completed successfully
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: string
                  logprob:
                    type: number
                    format: float
                  score:
                    type: number
                    format: float

  /reward_model_scoring/submit_job/:
    post:
      summary: Batch scoring using reward models
      description: |
        Submit a batch job for scoring multiple prompt-response pairs using a reward model.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                reward_model:
                  type: string
                  description: "Identifier for the reward model to be used."
                prompt_with_response_path:
                  $ref: '#/components/schemas/Path'
                  description: "Path to a JSONL file where each line is a List[Message] and custom_id."
                scoring_function:
                  $ref: '#/components/schemas/ScoringFunction'
                metadata:
                  type: object
                  additionalProperties: true
                  description: "Metadata to carry forward in the response."
      responses:
        '200':
          description: Batch scoring job successfully submitted
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchRewardModelScoringJob'

  /reward_model_scoring/submit_job/job_status:
    get:
      summary: Get status for an already submitted job
      description: |
        Retrieve the status and details of a previously submitted batch scoring job using its unique job ID.
      parameters:
        - in: query
          name: job_id
          schema:
            type: string
          required: true
          description: "Unique identifier for the batch scoring job."
      responses:
        '200':
          description: Batch scoring job status retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchRewardModelScoringJob'

components:
  schemas:
    Message:
      # reuse from /chat_completion

    Options:
      type: object
      properties:
        logprobs:
          type: boolean
        max_tokens:
          type: integer
        temperature:
          type: number
        top_p:
          type: number
        # TODO: Add/Remove more reward model specific params

    ScoringFunction:
      type: object
      properties:
        name:
          type: string
        params:
          type: object
          additionalProperties: true

    Path:
      type: object
      properties:
        value:
          type: string
        type:
          type: string
          enum:
            - raw_bytes
            - filepath
            - uri

    BatchRewardModelScoringJob:
      type: object
      properties:
        job_id:
          type: string
        created:
          type: string
          format: date-time
        status:
          type: string
          enum:
            - validating
            - running
            - completed
            - failed
        input_file_path:
          $ref: '#/components/schemas/Path'
        success_file_path:
          $ref: '#/components/schemas/Path'
        error_file_path:
          $ref: '#/components/schemas/Path'
        metadata:
          type: object
          additionalProperties: true
          description: "Metadata carried forward from the job submission."