llama-stack-mirror/simple_view/synthetic_data_generation.yml
2024-06-26 16:48:52 -07:00

58 lines
1.4 KiB
YAML

# Synthetic Data Generation API
== Schema ==
FilteringFunction:
name: str
params: json
SyntheticDataPoint:
custom_id: str
index: int
prompt: List[Message]
response: Message
logprob: float
score: float
SyntheticDataGenerationJob:
job_id: str # id provided by the api
created: string # format - date-time
status: string # enum (validating, running, completed, failed)
input_file_path: Path # jsonl style file where each row contains custom_id and message_list
success_file_path: Path # jsonl each line is SyntheticDataPoint
error_file_path: Path # custom_ids where we failed with some info
metadata: json
== Callsites ==
callsite:
/synthetic_data_gen/submit_job
request_type:
post
description:
Submit a job to generate synthetic data using llm + reward model scoring + filtering
request:
# batch inference params
model: str
prompt_file_path: Path # jsonl style file where each line is a json encoded List[Message] + custom_id
options: Options
num_generations: int
# reward model scoring params
reward_model: str
scoring_function: ScoringFunction
# filtering params
filtering_function: FilteringFunction
metadata: json
response:
synth_data_gen_job: SyntheticDataGenerationJob
callsite:
/synthetic_data_gen/job_status
request_type:
get
description:
Get status for an already submitted job
request:
job_id: str # unique identifier for the job
response:
synth_data_gen_job: SyntheticDataGenerationJob