mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-30 11:50:14 +00:00
167 lines
4.6 KiB
YAML
167 lines
4.6 KiB
YAML
openapi: 3.0.0
|
|
info:
|
|
title: Batch Inference API
|
|
version: 0.0.1
|
|
paths:
|
|
/batch_inference/submit_job:
|
|
post:
|
|
summary: Submit a batch inference job
|
|
description: |
|
|
This endpoint allows clients to submit a batch inference job using a model and a prompt file.
|
|
requestBody:
|
|
required: true
|
|
content:
|
|
application/json:
|
|
schema:
|
|
type: object
|
|
properties:
|
|
model:
|
|
type: string
|
|
description: "The model identifier to be used for inference."
|
|
prompt_file_path:
|
|
$ref: '#/components/schemas/Path'
|
|
description: "Path to a JSONL file where each line is a JSON-encoded list of messages."
|
|
options:
|
|
$ref: '#/components/schemas/Options'
|
|
num_generations:
|
|
type: integer
|
|
description: "Number of generations to produce."
|
|
responses:
|
|
'200':
|
|
description: Batch inference job successfully submitted
|
|
content:
|
|
application/json:
|
|
schema:
|
|
$ref: '#/components/schemas/BatchInference'
|
|
|
|
/batch_inference/job_status:
|
|
get:
|
|
summary: Get status for an already submitted job
|
|
description: |
|
|
Retrieve the status and details of a previously submitted batch inference job using its unique job ID.
|
|
parameters:
|
|
- in: query
|
|
name: job_id
|
|
schema:
|
|
type: string
|
|
required: true
|
|
description: "Unique identifier for the batch inference job."
|
|
responses:
|
|
'200':
|
|
description: Batch inference job status retrieved successfully
|
|
content:
|
|
application/json:
|
|
schema:
|
|
$ref: '#/components/schemas/BatchInference'
|
|
|
|
components:
|
|
schemas:
|
|
Message:
|
|
type: object
|
|
properties:
|
|
role:
|
|
type: string
|
|
text:
|
|
type: string
|
|
attachments:
|
|
type: array
|
|
items:
|
|
$ref: '#/components/schemas/MediaAttachment'
|
|
eot:
|
|
type: boolean
|
|
description: "End of transmission flag."
|
|
tool_call:
|
|
type: boolean
|
|
description: "Indicates if it's a tool call - builtin, custom, or ipython."
|
|
is_complete:
|
|
type: boolean
|
|
description: "For streaming, indicates if the message is complete."
|
|
is_header_complete:
|
|
type: boolean
|
|
description: "For streaming, indicates if the header of the message is complete."
|
|
metadata:
|
|
type: object
|
|
additionalProperties: true
|
|
description: "Additional metadata as JSON."
|
|
|
|
MediaAttachment:
|
|
type: object
|
|
properties:
|
|
attachment_type:
|
|
$ref: '#/components/schemas/MediaAttachmentType'
|
|
data_type:
|
|
$ref: '#/components/schemas/MediaAttachmentDataType'
|
|
data:
|
|
type: string
|
|
|
|
MediaAttachmentType:
|
|
type: string
|
|
enum:
|
|
- image
|
|
- video
|
|
- audio
|
|
- text
|
|
description: "Type of media attachment."
|
|
|
|
MediaAttachmentDataType:
|
|
type: string
|
|
enum:
|
|
- raw_bytes
|
|
- filepath
|
|
- uri
|
|
description: "Data type of the media attachment."
|
|
|
|
BatchInference:
|
|
type: object
|
|
properties:
|
|
job_id:
|
|
type: string
|
|
description: "ID provided by the API for the job."
|
|
created:
|
|
type: string
|
|
format: date-time
|
|
description: "Timestamp when the job was created."
|
|
status:
|
|
type: string
|
|
enum:
|
|
- validating
|
|
- running
|
|
- completed
|
|
- failed
|
|
description: "Current status of the job."
|
|
input_file_path:
|
|
$ref: '#/components/schemas/Path'
|
|
success_file_path:
|
|
$ref: '#/components/schemas/Path'
|
|
error_file_path:
|
|
$ref: '#/components/schemas/Path'
|
|
metadata:
|
|
type: object
|
|
additionalProperties: true
|
|
description: "Additional metadata related to the job."
|
|
|
|
Options:
|
|
type: object
|
|
properties:
|
|
logprobs:
|
|
type: boolean
|
|
max_tokens:
|
|
type: integer
|
|
temperature:
|
|
type: number
|
|
top_p:
|
|
type: number
|
|
|
|
Path:
|
|
type: object
|
|
properties:
|
|
value:
|
|
type: string
|
|
description: "The path value."
|
|
type:
|
|
type: string
|
|
enum:
|
|
- raw_bytes
|
|
- filepath
|
|
- uri
|
|
description: "Data Type of the path."
|