initial spec for bulk inference

2025-06-28 02:53:30 +00:00 · 2024-06-25 15:47:57 -07:00 · 2024-06-25 15:47:57 -07:00 · 7f54598a65
commit 7f54598a65
1 changed files with 115 additions and 0 deletions
--- a/bulk_inference.yaml
+++ b/bulk_inference.yaml
@ -0,0 +1,115 @@
+openapi: 3.0.0
+info:
+  title: Bulk Generations as a Service
+  version: 0.0.1
+components:
+  schemas:
+    BatchInference:
+      type: object
+      properties:
+        job_id:
+          type: string
+          description: Unique identifier for the job
+        created:
+          type: string
+          format: date-time
+          description: Timestamp when the job was created
+        status:
+          type: string
+          description: Current status of the job (running, completed)
+        input_file_path:
+          type: string
+          description: Path to the file containing successful results
+        success_file_path:
+          type: string
+          description: Path to the file containing successful results
+        error_file_path:
+          type: string
+          description: Path to the file containing error logs
+        metadata:
+          type: object
+          additionalProperties: true
+          description: User provided metadata
+paths:
+  /bulk_inference/submit_job:
+    post:
+      summary: Submit a batch inference job
+      description: Submit a batch inference job
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                model:
+                  type: string
+                  description: Model identifier
+                prompts:
+                  type: string
+                  description: Path to a JSONL file where each line is a JSON for a single inference API call
+                  format: path
+                batch_size:
+                  type: integer
+                  description: Number of prompts to process in one batch
+                temperature:
+                  type: number
+                  format: float
+                  description: Temperature setting for the generation
+                top_p:
+                  type: number
+                  format: float
+                  description: Top p setting for the generation
+                max_gen_len:
+                  type: integer
+                  description: Maximum generation length
+                num_generations:
+                  type: integer
+                  description: Number of generations to produce
+                logprobs:
+                  type: boolean
+                  description: Whether to include log probabilities in the output
+                output:
+                  type: string
+                  description: Output path where results should be stored
+                metadata:
+                  type: object
+                  additionalProperties: true
+                  description: Additional metadata for the job
+      responses:
+        '200':
+          description: Job successfully submitted
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchInference'
+        '400':
+          description: Invalid request parameters
+        '500':
+          description: Internal server error
+
+  /bulk_inference/job_status:
+    get:
+      summary: Get the status of a submitted job
+      description: Get the status of a submitted job
+      parameters:
+        - in: query
+          name: job_id
+          required: true
+          schema:
+            type: string
+          description: Unique identifier for the job
+      responses:
+        '200':
+          description: Job status retrieved successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchInference'
+        '400':
+          description: Invalid job ID provided
+        '404':
+          description: Job not found
+        '500':
+          description: Internal server error
+