cleanup

2025-12-03 09:53:45 +00:00 · 2024-07-11 10:04:56 -07:00 · 2024-07-11 10:04:56 -07:00 · f6b2b2fb39
commit f6b2b2fb39
parent 6d6c07b882
13 changed files with 20 additions and 2286 deletions
--- a/README.md
+++ b/README.md
@ -2,9 +2,26 @@ This repo contains the API specifications for various parts of the Llama Stack.
 The Stack consists of toolchain-apis and agentic-apis. 
 The tool chain apis that are covered -- 
- chat_completion
+- inference / batch inference
- batch inference
+- post training
 - fine tuning 
 - reward model scoring
 - synthetic data generation
 ### Generate OpenAPI specs 
 Set up virtual environment 
 ```
 python3.9 -m venv ~/.venv/toolchain/ 
 source ~/.venv/toolchain/bin/activate
 with-proxy pip3 install -r requirements.txt 
 ```
 Run the generate.sh script 
 ```
 cd source && sh generate.sh
 ```
--- a/batch_inference.yaml
+++ b/batch_inference.yaml
@ -1,167 +0,0 @@
 openapi: 3.0.0
 info:
  title: Batch Inference API
  version: 0.0.1
 paths:
  /batch_inference/submit_job:
    post:
      summary: Submit a batch inference job
      description: |
        This endpoint allows clients to submit a batch inference job using a model and a prompt file.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                model:
                  type: string
                  description: "The model identifier to be used for inference."
                prompt_file_path:
                  $ref: '#/components/schemas/Path'
                  description: "Path to a JSONL file where each line is a JSON-encoded list of messages."
                options:
                  $ref: '#/components/schemas/Options'
                num_generations:
                  type: integer
                  description: "Number of generations to produce."
      responses:
        '200':
          description: Batch inference job successfully submitted
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchInference'
  /batch_inference/job_status:
    get:
      summary: Get status for an already submitted job
      description: |
        Retrieve the status and details of a previously submitted batch inference job using its unique job ID.
      parameters:
        - in: query
          name: job_id
          schema:
            type: string
          required: true
          description: "Unique identifier for the batch inference job."
      responses:
        '200':
          description: Batch inference job status retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchInference'
 components:
  schemas:
    Message:
      type: object
      properties:
        role:
          type: string
        text:
          type: string
        attachments:
          type: array
          items:
            $ref: '#/components/schemas/MediaAttachment'
        eot:
          type: boolean
          description: "End of transmission flag."
        tool_call:
          type: boolean
          description: "Indicates if it's a tool call - builtin, custom, or ipython."
        is_complete:
          type: boolean
          description: "For streaming, indicates if the message is complete."
        is_header_complete:
          type: boolean
          description: "For streaming, indicates if the header of the message is complete."
        metadata:
          type: object
          additionalProperties: true
          description: "Additional metadata as JSON."
    MediaAttachment:
      type: object
      properties:
        attachment_type:
          $ref: '#/components/schemas/MediaAttachmentType'
        data_type:
          $ref: '#/components/schemas/MediaAttachmentDataType'
        data:
          type: string
    MediaAttachmentType:
      type: string
      enum:
        - image
        - video
        - audio
        - text
      description: "Type of media attachment."
    MediaAttachmentDataType:
      type: string
      enum:
        - raw_bytes
        - filepath
        - uri
      description: "Data type of the media attachment."
    BatchInference:
      type: object
      properties:
        job_id:
          type: string
          description: "ID provided by the API for the job."
        created:
          type: string
          format: date-time
          description: "Timestamp when the job was created."
        status:
          type: string
          enum:
            - validating
            - running
            - completed
            - failed
          description: "Current status of the job."
        input_file_path:
          $ref: '#/components/schemas/Path'
        success_file_path:
          $ref: '#/components/schemas/Path'
        error_file_path:
          $ref: '#/components/schemas/Path'
        metadata:
          type: object
          additionalProperties: true
          description: "Additional metadata related to the job."
    Options:
      type: object
      properties:
        logprobs:
          type: boolean
        max_tokens:
          type: integer
        temperature:
          type: number
        top_p:
          type: number
    Path:
      type: object
      properties:
        value:
          type: string
          description: "The path value."
        type:
          type: string
          enum:
            - raw_bytes
            - filepath
            - uri
          description: "Data Type of the path."
--- a/chat_completion.yaml
+++ b/chat_completion.yaml
@ -1,140 +0,0 @@
 openapi: 3.0.0
 info:
  title: Chat Completion API
  version: 0.0.1
 paths:
  /chat_completion/:
    post:
      summary: Submit a chat completion request
      description: |
        This endpoint allows clients to submit a chat completion request.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                messages:
                  type: array
                  items:
                    $ref: '#/components/schemas/Message'
                model:
                  type: string
                options:
                  $ref: '#/components/schemas/Options'
                n_completions:
                  type: integer
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: string
                  candidates:
                    type: array
                    items:
                      $ref: '#/components/schemas/Completion'
                  model_called:
                    type: string
                  usage:
                    $ref: '#/components/schemas/TokenUsage'
 components:
  schemas:
    Message:
      type: object
      properties:
        role:
          type: string
        text:
          type: string
        attachments:
          type: array
          items:
            $ref: '#/components/schemas/MediaAttachment'
        eot:
          type: boolean
          description: "End of transmission flag."
        tool_call:
          type: boolean
          description: "Indicates if it's a tool call - builtin, custom, or ipython."
        is_complete:
          type: boolean
          description: "For streaming, indicates if the message is complete."
        is_header_complete:
          type: boolean
          description: "For streaming, indicates if the header of the message is complete."
        metadata:
          type: object
          additionalProperties: true
          description: "Additional metadata as JSON."
    MediaAttachment:
      type: object
      properties:
        attachment_type:
          $ref: '#/components/schemas/MediaAttachmentType'
        data_type:
          $ref: '#/components/schemas/MediaAttachmentDataType'
        data:
          type: string
    MediaAttachmentType:
      type: string
      enum:
        - image
        - video
        - audio
        - text
      description: "Type of media attachment."
    MediaAttachmentDataType:
      type: string
      enum:
        - raw_bytes
        - filepath
        - uri
      description: "Data type of the media attachment."
    Completion:
      type: object
      properties:
        id:
          type: string
        message:
          $ref: '#/components/schemas/Message'
        tokens:
          type: array
          items:
            type: integer
        logprobs:
          type: array
          items:
            type: number
        finish_reason:
          type: string
          enum:
            - stop
            - safety
            - max-length
          description: "Reason for completion termination."
    Options:
      type: object
      properties:
        logprobs:
          type: boolean
        max_tokens:
          type: integer
        temperature:
          type: number
        top_p:
          type: number
    TokenUsage:
      type: object
      properties:
        input_tokens:
          type: integer
        output_tokens:
          type: integer
        total_tokens:
          type: integer
--- a/fine_tuning.yaml
+++ b/fine_tuning.yaml
@ -1,266 +0,0 @@
 openapi: 3.0.0
 info:
  title: Fine Tuning API
  version: 0.0.1
  description: API for managing fine tuning jobs for machine learning models.
 paths:
  /fine_tuning/jobs/submit:
    post:
      summary: Submit a fine tuning job
      description: Submit a fine tuning job with the specified configuration.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/Config'
      responses:
        200:
          description: Successfully submitted the fine tuning job.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FineTuningJob'
  /fine_tuning/jobs/status:
    get:
      summary: Gets last N fine tuning jobs
      description: Retrieve the status of the last N fine tuning jobs based on the provided job ID.
      parameters:
        - in: query
          name: job_id
          schema:
            type: string
          required: true
          description: The ID of the job to retrieve status for.
      responses:
        200:
          description: Successfully retrieved the job status.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FineTuningJob'
  /fine_tuning/jobs/cancel:
    post:
      summary: Cancel provided job
      description: Cancel the fine tuning job with the specified job ID.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                job_id:
                  type: string
      responses:
        200:
          description: Successfully cancelled the fine tuning job.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FineTuningJob'
  /fine_tuning/jobs/tail:
    get:
      summary: Tail logs of a particular job
      description: Stream the logs of a particular job in real-time. This endpoint supports streaming responses.
      parameters:
        - in: query
          name: job_id
          schema:
            type: string
          required: true
          description: The ID of the job to tail logs for.
      responses:
        200:
          description: Streaming logs in real-time.
          content:
            application/x-ndjson:
              schema:
                type: object
                properties:
                  logs:
                    type: array
                    items:
                      $ref: '#/components/schemas/Log'
          headers:
            Content-Type:
              schema:
                type: string
                default: 'application/x-ndjson'
            Transfer-Encoding:
              schema:
                type: string
                default: 'chunked'
 components:
  schemas:
    Message:
      # keep in sync with /chat_completion
    TrainingDataItem:
      type: object
      properties:
        dialog:
          type: array
          items:
            $ref: '#/components/schemas/Message'
        keep_loss:
          type: array
          items:
            type: boolean
    WandBLogger:
      type: object
      properties:
        project:
          type: string
          description: The project name in WandB where logs will be stored.
    DiskLogger:
      type: object
      properties:
        filename:
          type: string
          description: The filename where logs will be stored on disk.
    FullFineTuneOptions:
      type: object
      properties:
        enable_activation_checkpointing:
          type: boolean
          default: true
        memory_efficient_fsdp_wrap:
          type: boolean
          default: true
        fsdp_cpu_offload:
          type: boolean
          default: true
    LoraFineTuneOptions:
      type: object
      properties:
        lora_attn_modules:
          type: array
          items:
            type: string
        apply_lora_to_mlp:
          type: boolean
          default: false
        apply_lora_to_output:
          type: boolean
          default: false
        lora_rank:
          type: integer
        lora_alpha:
          type: integer
    FineTuningOptions:
      type: object
      properties:
        n_epochs:
          type: integer
        batch_size:
          type: integer
        lr:
          type: number
          format: float
        gradient_accumulation_steps:
          type: integer
        seed:
          type: integer
        shuffle:
          type: boolean
        custom_training_options:
          oneOf:
            - $ref: '#/components/schemas/FullFineTuneOptions'
            - $ref: '#/components/schemas/LoraFineTuneOptions'
          discriminator:
            propertyName: finetuning_type
        extras:
          # json to put other config overrides that are required by torchtune
          type: object
          additionalProperties: true
    Config:
      type: object
      properties:
        model:
          type: string
          description: The model identifier that you want to fine tune.
        data:
          type: string
          format: uri
          description: Path to the JSONL file with each row representing a TrainingDataItem.
        validation_data:
          type: string
          format: uri
          description: Path to the JSONL file used for validation metrics.
        fine_tuning_options:
          $ref: '#/components/schemas/FineTuningOptions'
        logger:
          oneOf:
            - $ref: '#/components/schemas/DiskLogger'
            - $ref: '#/components/schemas/WandBLogger'
          discriminator:
            propertyName: log_type
        overrides:
          # eg. --nproc_per_node 4 instead of default that we need to pass through to torchrun
          # when running locally
          type: string
          description: Custom override options for the fine tuning process.
        metadata:
          type: object
          additionalProperties: true
    FineTuningJob:
      type: object
      properties:
        job_id:
          type: string
          description: Unique identifier for the fine tuning job.
        created:
          type: string
          format: date-time
          description: The creation date and time of the job.
        finished_at:
          type: string
          format: date-time
          description: The completion date and time of the job.
        status:
          type: string
          enum: [validation, queued, running, failed, success, cancelled]
          description: The current status of the job.
        error_path:
          type: string
          format: uri
          description: Path to the error log file.
        checkpoints:
          type: array
          items:
            type: string
            format: uri
          description: List of paths to checkpoint files for various epochs.
        logs:
          type: string
          format: uri
          description: Path to the logs, either local or a WandB URI.
        input_config:
          $ref: '#/components/schemas/Config'
        metadata:
          type: object
          additionalProperties: true
    Log:
      type: object
      properties:
        message:
          type: string
          description: The log message.
        timestamp:
          type: string
          format: date-time
          description: The timestamp of the log message.
--- a/openapi/spec.yaml
+++ b/openapi/spec.yaml
--- a/reward_model_scoring.yaml
+++ b/reward_model_scoring.yaml
@ -1,162 +0,0 @@
 openapi: 3.0.0
 info:
  title: Reward Model Service API
  version: 0.0.1
 paths:
  /reward_model_scoring/:
    post:
      summary: Score a prompt-response pair using a reward model
      description: |
        This endpoint scores a given prompt-response pair using a specified reward model and scoring function.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                prompt_with_response:
                  type: array
                  items:
                    $ref: '#/components/schemas/Message'
                  description: "Prompt and response joined as a list of messages."
                reward_model:
                  type: string
                  description: "Identifier for the reward model to be used."
                scoring_function:
                  $ref: '#/components/schemas/ScoringFunction'
                options:
                  $ref: '#/components/schemas/Options'
      responses:
        '200':
          description: Scoring completed successfully
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: string
                  logprob:
                    type: number
                    format: float
                  score:
                    type: number
                    format: float
  /reward_model_scoring/submit_job/:
    post:
      summary: Batch scoring using reward models
      description: |
        Submit a batch job for scoring multiple prompt-response pairs using a reward model.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                reward_model:
                  type: string
                  description: "Identifier for the reward model to be used."
                prompt_with_response_path:
                  $ref: '#/components/schemas/Path'
                  description: "Path to a JSONL file where each line is a List[Message] and custom_id."
                scoring_function:
                  $ref: '#/components/schemas/ScoringFunction'
                metadata:
                  type: object
                  additionalProperties: true
                  description: "Metadata to carry forward in the response."
      responses:
        '200':
          description: Batch scoring job successfully submitted
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchRewardModelScoringJob'
  /reward_model_scoring/submit_job/job_status:
    get:
      summary: Get status for an already submitted job
      description: |
        Retrieve the status and details of a previously submitted batch scoring job using its unique job ID.
      parameters:
        - in: query
          name: job_id
          schema:
            type: string
          required: true
          description: "Unique identifier for the batch scoring job."
      responses:
        '200':
          description: Batch scoring job status retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchRewardModelScoringJob'
 components:
  schemas:
    Message:
      # reuse from /chat_completion
    Options:
      type: object
      properties:
        logprobs:
          type: boolean
        max_tokens:
          type: integer
        temperature:
          type: number
        top_p:
          type: number
        # TODO: Add/Remove more reward model specific params
    ScoringFunction:
      type: object
      properties:
        name:
          type: string
        params:
          type: object
          additionalProperties: true
    Path:
      type: object
      properties:
        value:
          type: string
        type:
          type: string
          enum:
            - raw_bytes
            - filepath
            - uri
    BatchRewardModelScoringJob:
      type: object
      properties:
        job_id:
          type: string
        created:
          type: string
          format: date-time
        status:
          type: string
          enum:
            - validating
            - running
            - completed
            - failed
        input_file_path:
          $ref: '#/components/schemas/Path'
        success_file_path:
          $ref: '#/components/schemas/Path'
        error_file_path:
          $ref: '#/components/schemas/Path'
        metadata:
          type: object
          additionalProperties: true
          description: "Metadata carried forward from the job submission."
--- a/simple_view/batch_inference.yml
+++ b/simple_view/batch_inference.yml
@ -1,65 +0,0 @@
 == Schema ==
 Message:
  role: str
  text: str
  attachements: List[MediaAttachment]
  eot: bool
  tool_call: bool  # if it's a tool call - builtin or custom or ipython
  # for streaming
  is_complete: bool
  is_header_complete: bool
  metadata: json
 MediaAttachment:
  attachement_type: MediaAttachmentType
  data_type: MediaAttachmentDataType
  data: str
 MediaAttachmentType: # enum [image, video, audio, text(or file)]
 MediaAttachmentDataType:  # enum [raw_bytes, filepath, uri]
 BatchInference:
  job_id: str  # id provided by the api
  created: string # format - date-time
  status: string  # enum (validating, running, completed, failed)
  input_file_path: Path  # jsonl style file where each
  success_file_path: Path
  error_file_path: Path
  metadata: json
 Options:
  logprobs: bool
  max_tokens: int
  temperature: float
  top_p: float
 Path:
  value: string
  type: string # enum [raw_bytes, filepath, uri]
 == Callsites ==
 callsite:
  /batch_inference/submit_job
 request_type:
  post
 description:
  Submit a batch inference job
 request:
  model: str
  prompt_file_path: Path  # jsonl style file where each line is a json encoded List[Message]
  options: Options
  num_generations: int
 response:
  batch_inference_job: BatchInference
 callsite:
  /batch_inference/job_status
 request_type:
  get
 description:
  Get status for an already submitted job
 request:
  job_id: str  # unique identifier for the job
 response:
  batch_inference_job: BatchInference
--- a/simple_view/chat_completion.yml
+++ b/simple_view/chat_completion.yml
@ -1,72 +0,0 @@
 # Simple bullet form for ease of read and iteration
 # Use LLMs to translate this to a OpenAPI spec.
 == Schema ==
 Message:
  role: str
  text: str
  attachements: List[MediaAttachment]
  eot: bool
  tool_call: bool  # if it's a tool call - builtin or custom or ipython
  # for streaming
  is_complete: bool
  is_header_complete: bool
  metadata: json
 MediaAttachment:
  attachement_type: MediaAttachmentType
  data_type: MediaAttachmentDataType
  data: str
 MediaAttachmentType: # enum [image, video, audio, text(or file)]
 MediaAttachmentDataType:  # enum [raw_bytes, filepath, uri]
 Completion:
  id: str
  message: Message
  tokens: List[int]
  logprobs: List[floats]
  finish_reason: str  # Enum (stop, safety, max-length, etc)
 Options:
  logprobs: bool
  max_tokens: int
  temperature: float
  top_p: float
  #TODO: Get more options from metagen
 TokenUsage:
  input_tokens: int
  output_tokens: int
  total_tokens: int
 == Callsite ==
 callsite:
  chat_completion/
 request_type:
  post
 description:
  submit a chat completion request
 request:
  messages: List[Message]
  model: str
  options: Options
  n_complections: int
  # TODO: how to handle tooling control if any ?
  # Add `tools` and `tool_choice` --
  # for eg. "auto": use model's guess
  # how to force to use particular tool
  # how to disbale inbuilt tools
  # tools: List[Tool]
  # tool_choice: Any
 response:
  id: str
  candidates: List[Completion]  # a list to account for when n_completions > 1
  model_called: str  # info on that model that produced this result
  usage: TokenUsage
 # TODO
 # callsite:
 #   chat_completion_stream/
--- a/simple_view/fine_tuning.yml
+++ b/simple_view/fine_tuning.yml
@ -1,134 +0,0 @@
 # Fine Tuning APIs
 == Schema ==
 TrainingDataItem:
  dialog: List[Message]
  keep_loss: List[bool]
 WandBLogger:
  project: str
 DiskLogger:
  # log_dir will be pre-configured in environment
  filename: str
 FullFineTuneOptions:
  enable_activation_checkpointing: True
  memory_efficient_fsdp_wrap: True
  fsdp_cpu_offload: True
 LoraFineTuneOptions:
  lora_attn_modules: ['q_proj', 'v_proj']
  apply_lora_to_mlp: False
  apply_lora_to_output: False
  lora_rank: 8
  lora_alpha: 16
 FineTuningOptions:
  n_epochs: int
  batch_size: int
  lr: float
  gradient_accumulation_steps: int
  seed: int
  shuffle: bool
  # Unions in OpenAPI with a reference field that can help disambiguate
  custom_training_options:
    discriminator:
      propertyName: fine_tuning_type
    mapping:
      fft: FullFineTuneOptions
      lora: LoraFineTuneOptions
  # other options that can be passed in
  extras: json
 Config:
  model: str # model that you want to fine tune
  data: Path  # jsonl with each row representing a TrainingDataItem
  validation_data: Path  # same as data but to get validation metrics on
  # fine tuning args
  fine_tuning_options: FineTuningOptions
  # metric logging
  logger:
    discriminator:
      propertyName: log_type
    mapping:
      disk: DiskLogger
      wandb: WandBLogger
  # Override options
  # eg. --nproc_per_node 4 insted of defaults,
  # this might be impl specific and can allow for various customizations
  overrides: str
  metadata: json  # to carry over to job details
 FineTuningJob:
  job_id: str
  created: str  # format date-time
  finished_at: str  # format date-time
  status: str  # enum - validation, queued, running, failed, success, cancelled
  error_path: Path  # error logging
  checkpoints: List[Path]  # checkpoints for various epochs
  logs: Path  # local path / wandb uri
  input_config: Config  # config used to submit this job
  metadata: json  # carried over rom user provided input
 Log:
  message: string  # The log message.
  timestamp: string  # format: date-time
 == Callsites ==
 callsite:
  /fine_tuning/jobs/submit
 request_type:
  post
 description:
  Submit a fine tuning job
 request:
  config: Config
 response:
  fine_tuning_job: FineTuningJob
 callsite:
  /fine_tuning/jobs/status
 request_type:
  get
 description:
  Gets last N fine tuning jobs
 request:
  job_id: str
 response:
  fine_tuning_job: FineTuningJob
 callsite:
  /fine_tuning/jobs/cancel
 request_type:
  post
 description:
  Cancel provided job
 request:
  job_id: str
 response:
  fine_tuning_job: FineTuningJob
 callsite:
  /fine_tuning/jobs/tail
 request_type:
  get
 description:
  Tail logs of a particular job
 request:
  job_id: str
 response:
  logs: List[Log]
  streaming:
    enabled: True
    chunkSize: 1024
--- a/simple_view/reward_model_scoring.yml
+++ b/simple_view/reward_model_scoring.yml
@ -1,70 +0,0 @@
 # Reward Model Service
 == Schema ==
 Message:
  # Same as /chat_completion
 Options:
  logprobs: bool
  max_tokens: int
  temperature: float
  top_p: float
  #TODO: Figure out what other reward model specific params
 ScoringFunction:
  name: str
  params: json
 BatchRewardModelScoringJob:
  job_id: str
  created: str  # format date-time
  status: string  # enum (validating, running, completed, failed)
  input_file_path: Path
  success_file_path: Path  # jsonl where each row has {custom_id: <from input>, logprob: <float>, score: float}
  error_file_path: Path  # jsonl where each row has {custom_id: <from input>, error: <error_info>}
  metadata: json  # carry forward from job submission api
 == Callsites ==
 callsite:
  reward_model_scoring/
 request_type:
  post
 description:
  Score a prompt-response pair using a reward model
 request:
  prompt_with_response: List[Message]  # prompt and response joined as a List[Message]
  reward_model: str
  scoring_function: ScoringFunction
  options: Options
 response:
  id: str
  logprob: float
  score: float
 callsite:
  reward_model_scoring/submit_job/
 request_type:
  post
 description:
  Batch scoring using reward models
 request:
  reward_model: str
  prompt_with_response_path: Path  # jsonl file where each line is a List[Message] and custom_id
  scoring_function: ScoringFunction
  metadata: json  # anything to carry forward over in the response
 response:
  batch_reward_model_scoring_job: BatchRewardModelScoringJob
 callsite:
  /reward_model_scoring/submit_job/job_status
 request_type:
  get
 description:
  Get status for an already submitted job
 request:
  job_id: str  # unique identifier for the job
 response:
  batch_reward_model_scoring_job: BatchRewardModelScoringJob
--- a/simple_view/synthetic_data_generation.yml
+++ b/simple_view/synthetic_data_generation.yml
@ -1,58 +0,0 @@
 # Synthetic Data Generation API
 == Schema ==
 FilteringFunction:
  name: str
  params: json
 SyntheticDataPoint:
  custom_id: str
  index: int
  prompt: List[Message]
  response: Message
  logprob: float
  score: float
 SyntheticDataGenerationJob:
  job_id: str  # id provided by the api
  created: string # format - date-time
  status: string  # enum (validating, running, completed, failed)
  input_file_path: Path  # jsonl style file where each row contains custom_id and message_list
  success_file_path: Path  # jsonl each line is SyntheticDataPoint
  error_file_path: Path  # custom_ids where we failed with some info
  metadata: json
 == Callsites ==
 callsite:
  /synthetic_data_gen/submit_job
 request_type:
  post
 description:
  Submit a job to generate synthetic data using llm + reward model scoring + filtering
 request:
  # batch inference params
  model: str
  prompt_file_path: Path  # jsonl style file where each line is a json encoded List[Message] + custom_id
  options: Options
  num_generations: int
  # reward model scoring params
  reward_model: str
  scoring_function: ScoringFunction
  # filtering params
  filtering_function: FilteringFunction
  metadata: json
 response:
  synth_data_gen_job: SyntheticDataGenerationJob
 callsite:
  /synthetic_data_gen/job_status
 request_type:
  get
 description:
  Get status for an already submitted job
 request:
  job_id: str  # unique identifier for the job
 response:
  synth_data_gen_job: SyntheticDataGenerationJob
--- a/source/generate.sh
+++ b/source/generate.sh
--- a/synthetic_data_generation.yaml
+++ b/synthetic_data_generation.yaml
@ -1,131 +0,0 @@
 openapi: 3.0.0
 info:
  title: Synthetic Data Generation API
  version: 0.0.1
 paths:
  /synthetic_data_gen/submit_job:
    post:
      summary: Submit a job to generate synthetic data
      description: Submit a job to generate synthetic data using llm + reward model scoring + filtering
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                model:
                  type: string
                  description: Model used for batch inference
                prompt_file_path:
                  type: string
                  format: path
                  description: Path to the JSONL file containing message_lists and custom IDs
                options:
                  $ref: '#/components/schemas/Options'
                num_generations:
                  type: integer
                  description: Number of generations to produce
                reward_model:
                  type: string
                  description: Model used for scoring
                scoring_function:
                  $ref: '#/components/schemas/ScoringFunction'
                filtering_function:
                  $ref: '#/components/schemas/FilteringFunction'
                metadata:
                  type: object
                  additionalProperties: true
                  description: Additional metadata for the job
      responses:
        '200':
          description: Job successfully submitted
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SyntheticDataGenerationJob'
  /synthetic_data_gen/job_status:
    get:
      summary: Get job status
      description: Get status for an already submitted job
      parameters:
        - in: query
          name: job_id
          schema:
            type: string
          required: true
          description: Unique identifier for the job
      responses:
        '200':
          description: Job status retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SyntheticDataGenerationJob'
 components:
  schemas:
    FilteringFunction:
      type: object
      properties:
        name:
          type: string
          description: Name of the filtering function
        params:
          type: object
          additionalProperties: true
          description: JSON object containing parameters for the filtering function
    SyntheticDataPoint:
      type: object
      properties:
        custom_id:
          type: string
          description: Custom identifier for the data point
        index:
          type: integer
          description: Index of the data point
        prompt:
          type: array
          items:
            $ref: '#/components/schemas/Message'
          description: List of messages used as prompt
        response:
          $ref: '#/components/schemas/Message'
        logprob:
          type: number
          format: float
          description: Log probability of the response
        score:
          type: number
          format: float
          description: Score of the response based on the reward model
    SyntheticDataGenerationJob:
      type: object
      properties:
        job_id:
          type: string
          description: ID provided by the API
        created:
          type: string
          format: date-time
          description: Timestamp when the job was created
        status:
          type: string
          enum: [validating, running, completed, failed]
          description: Current status of the job
        input_file_path:
          type: string
          format: path
          description: Path to the input JSONL file
        success_file_path:
          type: string
          format: path
          description: Path to the JSONL file containing successful results
        error_file_path:
          type: string
          format: path
          description: Path to the JSONL file containing errors
        metadata:
          type: object
          additionalProperties: true
          description: Additional metadata about the job