mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
115 lines
3.6 KiB
YAML
115 lines
3.6 KiB
YAML
openapi: 3.0.0
|
|
info:
|
|
title: Bulk Generations as a Service
|
|
version: 0.0.1
|
|
components:
|
|
schemas:
|
|
BatchInference:
|
|
type: object
|
|
properties:
|
|
job_id:
|
|
type: string
|
|
description: Unique identifier for the job
|
|
created:
|
|
type: string
|
|
format: date-time
|
|
description: Timestamp when the job was created
|
|
status:
|
|
type: string
|
|
description: Current status of the job (running, completed)
|
|
input_file_path:
|
|
type: string
|
|
description: Path to the file containing successful results
|
|
success_file_path:
|
|
type: string
|
|
description: Path to the file containing successful results
|
|
error_file_path:
|
|
type: string
|
|
description: Path to the file containing error logs
|
|
metadata:
|
|
type: object
|
|
additionalProperties: true
|
|
description: User provided metadata
|
|
paths:
|
|
/bulk_inference/submit_job:
|
|
post:
|
|
summary: Submit a batch inference job
|
|
description: Submit a batch inference job
|
|
requestBody:
|
|
required: true
|
|
content:
|
|
application/json:
|
|
schema:
|
|
type: object
|
|
properties:
|
|
model:
|
|
type: string
|
|
description: Model identifier
|
|
prompts:
|
|
type: string
|
|
description: Path to a JSONL file where each line is a JSON for a single inference API call
|
|
format: path
|
|
batch_size:
|
|
type: integer
|
|
description: Number of prompts to process in one batch
|
|
temperature:
|
|
type: number
|
|
format: float
|
|
description: Temperature setting for the generation
|
|
top_p:
|
|
type: number
|
|
format: float
|
|
description: Top p setting for the generation
|
|
max_gen_len:
|
|
type: integer
|
|
description: Maximum generation length
|
|
num_generations:
|
|
type: integer
|
|
description: Number of generations to produce
|
|
logprobs:
|
|
type: boolean
|
|
description: Whether to include log probabilities in the output
|
|
output:
|
|
type: string
|
|
description: Output path where results should be stored
|
|
metadata:
|
|
type: object
|
|
additionalProperties: true
|
|
description: Additional metadata for the job
|
|
responses:
|
|
'200':
|
|
description: Job successfully submitted
|
|
content:
|
|
application/json:
|
|
schema:
|
|
$ref: '#/components/schemas/BatchInference'
|
|
'400':
|
|
description: Invalid request parameters
|
|
'500':
|
|
description: Internal server error
|
|
|
|
/bulk_inference/job_status:
|
|
get:
|
|
summary: Get the status of a submitted job
|
|
description: Get the status of a submitted job
|
|
parameters:
|
|
- in: query
|
|
name: job_id
|
|
required: true
|
|
schema:
|
|
type: string
|
|
description: Unique identifier for the job
|
|
responses:
|
|
'200':
|
|
description: Job status retrieved successfully
|
|
content:
|
|
application/json:
|
|
schema:
|
|
$ref: '#/components/schemas/BatchInference'
|
|
'400':
|
|
description: Invalid job ID provided
|
|
'404':
|
|
description: Job not found
|
|
'500':
|
|
description: Internal server error
|
|
|