updates to the batch inference apis

This commit is contained in:
Hardik Shah 2024-06-26 15:45:18 -07:00
parent eb81ad1ffd
commit 04f89ad315
2 changed files with 193 additions and 76 deletions

View file

@ -1,40 +1,13 @@
openapi: 3.0.0 openapi: 3.0.0
info: info:
title: Batch Generations as a Service title: Batch Inference API
version: 0.0.1 version: 0.0.1
components:
schemas:
BatchInference:
type: object
properties:
job_id:
type: string
description: Unique identifier for the job
created:
type: string
format: date-time
description: Timestamp when the job was created
status:
type: string
description: Current status of the job (running, completed)
input_file_path:
type: string
description: Path to the file containing successful results
success_file_path:
type: string
description: Path to the file containing successful results
error_file_path:
type: string
description: Path to the file containing error logs
metadata:
type: object
additionalProperties: true
description: User provided metadata
paths: paths:
/batch_inference/submit_job: /batch_inference/submit_job:
post: post:
summary: Submit a batch inference job summary: Submit a batch inference job
description: Submit a batch inference job description: |
This endpoint allows clients to submit a batch inference job using a model and a prompt file.
requestBody: requestBody:
required: true required: true
content: content:
@ -44,72 +17,151 @@ paths:
properties: properties:
model: model:
type: string type: string
description: Model identifier description: "The model identifier to be used for inference."
prompts: prompt_file_path:
type: string $ref: '#/components/schemas/Path'
description: Path to a JSONL file where each line is a JSON for a single inference API call description: "Path to a JSONL file where each line is a JSON-encoded list of messages."
format: path options:
batch_size: $ref: '#/components/schemas/Options'
type: integer
description: Number of prompts to process in one batch
temperature:
type: number
format: float
description: Temperature setting for the generation
top_p:
type: number
format: float
description: Top p setting for the generation
max_gen_len:
type: integer
description: Maximum generation length
num_generations: num_generations:
type: integer type: integer
description: Number of generations to produce description: "Number of generations to produce."
logprobs:
type: boolean
description: Whether to include log probabilities in the output
output:
type: string
description: Output path where results should be stored
metadata:
type: object
additionalProperties: true
description: Additional metadata for the job
responses: responses:
'200': '200':
description: Job successfully submitted description: Batch inference job successfully submitted
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/BatchInference' $ref: '#/components/schemas/BatchInference'
'400':
description: Invalid request parameters
'500':
description: Internal server error
/batch_inference/job_status: /batch_inference/job_status:
get: get:
summary: Get the status of a submitted job summary: Get status for an already submitted job
description: Get the status of a submitted job description: |
Retrieve the status and details of a previously submitted batch inference job using its unique job ID.
parameters: parameters:
- in: query - in: query
name: job_id name: job_id
required: true
schema: schema:
type: string type: string
description: Unique identifier for the job required: true
description: "Unique identifier for the batch inference job."
responses: responses:
'200': '200':
description: Job status retrieved successfully description: Batch inference job status retrieved successfully
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/BatchInference' $ref: '#/components/schemas/BatchInference'
'400':
description: Invalid job ID provided
'404':
description: Job not found
'500':
description: Internal server error
components:
schemas:
Message:
type: object
properties:
role:
type: string
text:
type: string
attachments:
type: array
items:
$ref: '#/components/schemas/MediaAttachment'
eot:
type: boolean
description: "End of transmission flag."
tool_call:
type: boolean
description: "Indicates if it's a tool call - builtin, custom, or ipython."
is_complete:
type: boolean
description: "For streaming, indicates if the message is complete."
is_header_complete:
type: boolean
description: "For streaming, indicates if the header of the message is complete."
metadata:
type: object
additionalProperties: true
description: "Additional metadata as JSON."
MediaAttachment:
type: object
properties:
attachment_type:
$ref: '#/components/schemas/MediaAttachmentType'
data_type:
$ref: '#/components/schemas/MediaAttachmentDataType'
data:
type: string
MediaAttachmentType:
type: string
enum:
- image
- video
- audio
- text
description: "Type of media attachment."
MediaAttachmentDataType:
type: string
enum:
- raw_bytes
- filepath
- uri
description: "Data type of the media attachment."
BatchInference:
type: object
properties:
job_id:
type: string
description: "ID provided by the API for the job."
created:
type: string
format: date-time
description: "Timestamp when the job was created."
status:
type: string
enum:
- validating
- running
- completed
- failed
description: "Current status of the job."
input_file_path:
$ref: '#/components/schemas/Path'
success_file_path:
$ref: '#/components/schemas/Path'
error_file_path:
$ref: '#/components/schemas/Path'
metadata:
type: object
additionalProperties: true
description: "Additional metadata related to the job."
Options:
type: object
properties:
logprobs:
type: boolean
max_tokens:
type: integer
temperature:
type: number
top_p:
type: number
Path:
type: object
properties:
value:
type: string
description: "The path value."
type:
type: string
enum:
- raw_bytes
- filepath
- uri
description: "Data Type of the path."

View file

@ -0,0 +1,65 @@
== Schema ==
Message:
role: str
text: str
attachements: List[MediaAttachment]
eot: bool
tool_call: bool # if it's a tool call - builtin or custom or ipython
# for streaming
is_complete: bool
is_header_complete: bool
metadata: json
MediaAttachment:
attachement_type: MediaAttachmentType
data_type: MediaAttachmentDataType
data: str
MediaAttachmentType: # enum [image, video, audio, text(or file)]
MediaAttachmentDataType: # enum [raw_bytes, filepath, uri]
BatchInference:
job_id: str # id provided by the api
created: string # format - date-time
status: string # enum (validating, running, completed, failed)
input_file_path: Path # jsonl style file where each
success_file_path: Path
error_file_path: Path
metadata: json
Options:
logprobs: bool
max_tokens: int
temperature: float
top_p: float
Path:
value: string
type: string # enum [raw_bytes, filepath, uri]
== Callsites ==
callsite:
/batch_inference/submit_job
request_type:
post
description:
Submit a batch inference job
request:
model: str
prompt_file_path: Path # jsonl style file where each line is a json encoded List[Message]
options: Options
num_generations: int
response:
batch_inference_job: BatchInference
callsite:
/batch_inference/job_status
request_type:
get
description:
Get status for an already submitted job
request:
job_id: str # unique identifier for the job
response:
batch_inference_job: BatchInference