llama-stack-mirror/batch_inference.yaml
2024-06-26 15:45:18 -07:00

167 lines
4.6 KiB
YAML

openapi: 3.0.0
info:
title: Batch Inference API
version: 0.0.1
paths:
/batch_inference/submit_job:
post:
summary: Submit a batch inference job
description: |
This endpoint allows clients to submit a batch inference job using a model and a prompt file.
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
model:
type: string
description: "The model identifier to be used for inference."
prompt_file_path:
$ref: '#/components/schemas/Path'
description: "Path to a JSONL file where each line is a JSON-encoded list of messages."
options:
$ref: '#/components/schemas/Options'
num_generations:
type: integer
description: "Number of generations to produce."
responses:
'200':
description: Batch inference job successfully submitted
content:
application/json:
schema:
$ref: '#/components/schemas/BatchInference'
/batch_inference/job_status:
get:
summary: Get status for an already submitted job
description: |
Retrieve the status and details of a previously submitted batch inference job using its unique job ID.
parameters:
- in: query
name: job_id
schema:
type: string
required: true
description: "Unique identifier for the batch inference job."
responses:
'200':
description: Batch inference job status retrieved successfully
content:
application/json:
schema:
$ref: '#/components/schemas/BatchInference'
components:
schemas:
Message:
type: object
properties:
role:
type: string
text:
type: string
attachments:
type: array
items:
$ref: '#/components/schemas/MediaAttachment'
eot:
type: boolean
description: "End of transmission flag."
tool_call:
type: boolean
description: "Indicates if it's a tool call - builtin, custom, or ipython."
is_complete:
type: boolean
description: "For streaming, indicates if the message is complete."
is_header_complete:
type: boolean
description: "For streaming, indicates if the header of the message is complete."
metadata:
type: object
additionalProperties: true
description: "Additional metadata as JSON."
MediaAttachment:
type: object
properties:
attachment_type:
$ref: '#/components/schemas/MediaAttachmentType'
data_type:
$ref: '#/components/schemas/MediaAttachmentDataType'
data:
type: string
MediaAttachmentType:
type: string
enum:
- image
- video
- audio
- text
description: "Type of media attachment."
MediaAttachmentDataType:
type: string
enum:
- raw_bytes
- filepath
- uri
description: "Data type of the media attachment."
BatchInference:
type: object
properties:
job_id:
type: string
description: "ID provided by the API for the job."
created:
type: string
format: date-time
description: "Timestamp when the job was created."
status:
type: string
enum:
- validating
- running
- completed
- failed
description: "Current status of the job."
input_file_path:
$ref: '#/components/schemas/Path'
success_file_path:
$ref: '#/components/schemas/Path'
error_file_path:
$ref: '#/components/schemas/Path'
metadata:
type: object
additionalProperties: true
description: "Additional metadata related to the job."
Options:
type: object
properties:
logprobs:
type: boolean
max_tokens:
type: integer
temperature:
type: number
top_p:
type: number
Path:
type: object
properties:
value:
type: string
description: "The path value."
type:
type: string
enum:
- raw_bytes
- filepath
- uri
description: "Data Type of the path."