llama-stack-mirror/synthetic_data_generation.yaml
2024-06-26 16:48:52 -07:00

131 lines
4.1 KiB
YAML

openapi: 3.0.0
info:
title: Synthetic Data Generation API
version: 0.0.1
paths:
/synthetic_data_gen/submit_job:
post:
summary: Submit a job to generate synthetic data
description: Submit a job to generate synthetic data using llm + reward model scoring + filtering
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
model:
type: string
description: Model used for batch inference
prompt_file_path:
type: string
format: path
description: Path to the JSONL file containing message_lists and custom IDs
options:
$ref: '#/components/schemas/Options'
num_generations:
type: integer
description: Number of generations to produce
reward_model:
type: string
description: Model used for scoring
scoring_function:
$ref: '#/components/schemas/ScoringFunction'
filtering_function:
$ref: '#/components/schemas/FilteringFunction'
metadata:
type: object
additionalProperties: true
description: Additional metadata for the job
responses:
'200':
description: Job successfully submitted
content:
application/json:
schema:
$ref: '#/components/schemas/SyntheticDataGenerationJob'
/synthetic_data_gen/job_status:
get:
summary: Get job status
description: Get status for an already submitted job
parameters:
- in: query
name: job_id
schema:
type: string
required: true
description: Unique identifier for the job
responses:
'200':
description: Job status retrieved successfully
content:
application/json:
schema:
$ref: '#/components/schemas/SyntheticDataGenerationJob'
components:
schemas:
FilteringFunction:
type: object
properties:
name:
type: string
description: Name of the filtering function
params:
type: object
additionalProperties: true
description: JSON object containing parameters for the filtering function
SyntheticDataPoint:
type: object
properties:
custom_id:
type: string
description: Custom identifier for the data point
index:
type: integer
description: Index of the data point
prompt:
type: array
items:
$ref: '#/components/schemas/Message'
description: List of messages used as prompt
response:
$ref: '#/components/schemas/Message'
logprob:
type: number
format: float
description: Log probability of the response
score:
type: number
format: float
description: Score of the response based on the reward model
SyntheticDataGenerationJob:
type: object
properties:
job_id:
type: string
description: ID provided by the API
created:
type: string
format: date-time
description: Timestamp when the job was created
status:
type: string
enum: [validating, running, completed, failed]
description: Current status of the job
input_file_path:
type: string
format: path
description: Path to the input JSONL file
success_file_path:
type: string
format: path
description: Path to the JSONL file containing successful results
error_file_path:
type: string
format: path
description: Path to the JSONL file containing errors
metadata:
type: object
additionalProperties: true
description: Additional metadata about the job