diff --git a/simple_view/synthetic_data_generation.yml b/simple_view/synthetic_data_generation.yml
new file mode 100644
index 000000000..27c942c24
--- /dev/null
+++ b/simple_view/synthetic_data_generation.yml
@@ -0,0 +1,58 @@
+# Synthetic Data Generation API
+== Schema ==
+
+FilteringFunction:
+  name: str
+  params: json
+
+SyntheticDataPoint:
+  custom_id: str
+  index: int
+  prompt: List[Message]
+  response: Message
+  logprob: float
+  score: float
+
+SyntheticDataGenerationJob:
+  job_id: str  # id provided by the api
+  created: string # format - date-time
+  status: string  # enum (validating, running, completed, failed)
+  input_file_path: Path  # jsonl style file where each row contains custom_id and message_list
+  success_file_path: Path  # jsonl each line is SyntheticDataPoint
+  error_file_path: Path  # custom_ids where we failed with some info
+  metadata: json
+
+== Callsites ==
+
+callsite:
+  /synthetic_data_gen/submit_job
+request_type:
+  post
+description:
+  Submit a job to generate synthetic data using llm + reward model scoring + filtering
+request:
+  # batch inference params
+  model: str
+  prompt_file_path: Path  # jsonl style file where each line is a json encoded List[Message] + custom_id
+  options: Options
+  num_generations: int
+  # reward model scoring params
+  reward_model: str
+  scoring_function: ScoringFunction
+  # filtering params
+  filtering_function: FilteringFunction
+  metadata: json
+
+response:
+  synth_data_gen_job: SyntheticDataGenerationJob
+
+callsite:
+  /synthetic_data_gen/job_status
+request_type:
+  get
+description:
+  Get status for an already submitted job
+request:
+  job_id: str  # unique identifier for the job
+response:
+  synth_data_gen_job: SyntheticDataGenerationJob
diff --git a/synthetic_data_generation.yaml b/synthetic_data_generation.yaml
index 81e8f4965..f11786f47 100644
--- a/synthetic_data_generation.yaml
+++ b/synthetic_data_generation.yaml
@@ -1,12 +1,12 @@
 openapi: 3.0.0
 info:
-  title: API for Synthetic Data Generation. This combines other serivces like batch inference and reward model scoring.
+  title: Synthetic Data Generation API
   version: 0.0.1
 paths:
-  /synthetic_data_generation/submit_job:
+  /synthetic_data_gen/submit_job:
     post:
-      summary: Submit a job for synthetic data generation.
-      description: Batch Inference > Reward Scoring > Filtering > Response
+      summary: Submit a job to generate synthetic data
+      description: Submit a job to generate synthetic data using llm + reward model scoring + filtering
       requestBody:
         required: true
         content:
@@ -14,69 +14,47 @@ paths:
             schema:
               type: object
               properties:
-                # batch inference params
                 model:
                   type: string
-                  description: Model identifier for batch inference.
-                prompts_path:
+                  description: Model used for batch inference
+                prompt_file_path:
                   type: string
-                  description: Path to prompts, JSONL for batch inference
-                batch_size:
-                  type: integer
-                  description: Number of prompts to process in each batch.
-                # TODO: May-be put all these generation related params in a struct
-                temperature:
-                  type: number
-                  format: float
-                  description: Temperature parameter for generation.
-                top_p:
-                  type: number
-                  format: float
-                  description: Top-p parameter for generation.
-                max_gen_len:
-                  type: integer
-                  description: Maximum length of generated responses.
+                  format: path
+                  description: Path to the JSONL file containing message_lists and custom IDs
+                options:
+                  $ref: '#/components/schemas/Options'
                 num_generations:
                   type: integer
-                  description: Number of generations per prompt.
-                # reward model scoring params
+                  description: Number of generations to produce
                 reward_model:
                   type: string
-                  description: Identifier for the reward model used for scoring.
+                  description: Model used for scoring
                 scoring_function:
-                  type: string
-                  description: Scoring function to apply.
-                # params for filtering responses
-                # filtering function will have a signature as
-                # def filter_responses(List[PromptResponseScore]) --> List[PromptResponseScore]: ...
+                  $ref: '#/components/schemas/ScoringFunction'
                 filtering_function:
+                  $ref: '#/components/schemas/FilteringFunction'
+                metadata:
                   type: object
-                  properties:
-                    name:
-                      type: string
-                      description: Name of the filtering function, can be a simple threshold or a pre-registered function.
-                    params:
-                      type: object
-                      additionalProperties: true
-                      description: JSON object containing parameters for the filtering function.
+                  additionalProperties: true
+                  description: Additional metadata for the job
       responses:
         '200':
-          description: Job successfully created and processing.
+          description: Job successfully submitted
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/SyntheticDataGeneration'
+                $ref: '#/components/schemas/SyntheticDataGenerationJob'
 
-/synthetic_data_generation/job_status:
+  /synthetic_data_gen/job_status:
     get:
-      summary: Get the status of a submitted job
-      description: Get the status of a submitted job
+      summary: Get job status
+      description: Get status for an already submitted job
       parameters:
         - in: query
           name: job_id
-          required: true
           schema:
             type: string
+          required: true
           description: Unique identifier for the job
       responses:
         '200':
@@ -84,58 +62,70 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/SyntheticDataGeneration'
-        '400':
-          description: Invalid job ID provided
-        '404':
-          description: Job not found
-
+                $ref: '#/components/schemas/SyntheticDataGenerationJob'
 components:
   schemas:
-    PromptResponseScore:
+    FilteringFunction:
       type: object
       properties:
-        id:
+        name:
           type: string
-          description: Carry forwarded from the user provided id from prompt.
+          description: Name of the filtering function
+        params:
+          type: object
+          additionalProperties: true
+          description: JSON object containing parameters for the filtering function
+    SyntheticDataPoint:
+      type: object
+      properties:
+        custom_id:
+          type: string
+          description: Custom identifier for the data point
         index:
           type: integer
-          description: Index of the generation.
+          description: Index of the data point
         prompt:
           type: array
           items:
             $ref: '#/components/schemas/Message'
+          description: List of messages used as prompt
         response:
-          $ref: '#/components/schemas/Completion'
+          $ref: '#/components/schemas/Message'
+        logprob:
+          type: number
+          format: float
+          description: Log probability of the response
         score:
           type: number
           format: float
-          description: Final score after filtering.
-        raw_score:
-          type: number
-          format: float
-          description: Raw score from the reward model.
-    SyntheticDataGeneration:
+          description: Score of the response based on the reward model
+    SyntheticDataGenerationJob:
       type: object
       properties:
         job_id:
           type: string
-          description: Unique identifier for the job.
+          description: ID provided by the API
         created:
           type: string
           format: date-time
-          description: Timestamp when the job was created.
+          description: Timestamp when the job was created
         status:
           type: string
-          description: Current status of the job, can indicate the stage or success/failure.
-        output_file_path:
+          enum: [validating, running, completed, failed]
+          description: Current status of the job
+        input_file_path:
           type: string
-          description: Path to the output jsonl file where each row is a json encoded PromptResponseScore object.
-    Message:
-      type: object
-      properties:
-        # As Defined in /batch_inference
-    Completion:
-      type: object
-      properties:
-        # As Defined in /batch_inference
+          format: path
+          description: Path to the input JSONL file
+        success_file_path:
+          type: string
+          format: path
+          description: Path to the JSONL file containing successful results
+        error_file_path:
+          type: string
+          format: path
+          description: Path to the JSONL file containing errors
+        metadata:
+          type: object
+          additionalProperties: true
+          description: Additional metadata about the job