openapi: 3.0.0
info:
  title: Synthetic Data Generation API
  version: 0.0.1
paths:
  /synthetic_data_gen/submit_job:
    post:
      summary: Submit a job to generate synthetic data
      description: Submit a job to generate synthetic data using llm + reward model scoring + filtering
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                model:
                  type: string
                  description: Model used for batch inference
                prompt_file_path:
                  type: string
                  format: path
                  description: Path to the JSONL file containing message_lists and custom IDs
                options:
                  $ref: '#/components/schemas/Options'
                num_generations:
                  type: integer
                  description: Number of generations to produce
                reward_model:
                  type: string
                  description: Model used for scoring
                scoring_function:
                  $ref: '#/components/schemas/ScoringFunction'
                filtering_function:
                  $ref: '#/components/schemas/FilteringFunction'
                metadata:
                  type: object
                  additionalProperties: true
                  description: Additional metadata for the job
      responses:
        '200':
          description: Job successfully submitted
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SyntheticDataGenerationJob'

  /synthetic_data_gen/job_status:
    get:
      summary: Get job status
      description: Get status for an already submitted job
      parameters:
        - in: query
          name: job_id
          schema:
            type: string
          required: true
          description: Unique identifier for the job
      responses:
        '200':
          description: Job status retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SyntheticDataGenerationJob'
components:
  schemas:
    FilteringFunction:
      type: object
      properties:
        name:
          type: string
          description: Name of the filtering function
        params:
          type: object
          additionalProperties: true
          description: JSON object containing parameters for the filtering function
    SyntheticDataPoint:
      type: object
      properties:
        custom_id:
          type: string
          description: Custom identifier for the data point
        index:
          type: integer
          description: Index of the data point
        prompt:
          type: array
          items:
            $ref: '#/components/schemas/Message'
          description: List of messages used as prompt
        response:
          $ref: '#/components/schemas/Message'
        logprob:
          type: number
          format: float
          description: Log probability of the response
        score:
          type: number
          format: float
          description: Score of the response based on the reward model
    SyntheticDataGenerationJob:
      type: object
      properties:
        job_id:
          type: string
          description: ID provided by the API
        created:
          type: string
          format: date-time
          description: Timestamp when the job was created
        status:
          type: string
          enum: [validating, running, completed, failed]
          description: Current status of the job
        input_file_path:
          type: string
          format: path
          description: Path to the input JSONL file
        success_file_path:
          type: string
          format: path
          description: Path to the JSONL file containing successful results
        error_file_path:
          type: string
          format: path
          description: Path to the JSONL file containing errors
        metadata:
          type: object
          additionalProperties: true
          description: Additional metadata about the job