mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-23 21:04:29 +00:00
58 lines
1.4 KiB
YAML
58 lines
1.4 KiB
YAML
# Synthetic Data Generation API
|
|
== Schema ==
|
|
|
|
FilteringFunction:
|
|
name: str
|
|
params: json
|
|
|
|
SyntheticDataPoint:
|
|
custom_id: str
|
|
index: int
|
|
prompt: List[Message]
|
|
response: Message
|
|
logprob: float
|
|
score: float
|
|
|
|
SyntheticDataGenerationJob:
|
|
job_id: str # id provided by the api
|
|
created: string # format - date-time
|
|
status: string # enum (validating, running, completed, failed)
|
|
input_file_path: Path # jsonl style file where each row contains custom_id and message_list
|
|
success_file_path: Path # jsonl each line is SyntheticDataPoint
|
|
error_file_path: Path # custom_ids where we failed with some info
|
|
metadata: json
|
|
|
|
== Callsites ==
|
|
|
|
callsite:
|
|
/synthetic_data_gen/submit_job
|
|
request_type:
|
|
post
|
|
description:
|
|
Submit a job to generate synthetic data using llm + reward model scoring + filtering
|
|
request:
|
|
# batch inference params
|
|
model: str
|
|
prompt_file_path: Path # jsonl style file where each line is a json encoded List[Message] + custom_id
|
|
options: Options
|
|
num_generations: int
|
|
# reward model scoring params
|
|
reward_model: str
|
|
scoring_function: ScoringFunction
|
|
# filtering params
|
|
filtering_function: FilteringFunction
|
|
metadata: json
|
|
|
|
response:
|
|
synth_data_gen_job: SyntheticDataGenerationJob
|
|
|
|
callsite:
|
|
/synthetic_data_gen/job_status
|
|
request_type:
|
|
get
|
|
description:
|
|
Get status for an already submitted job
|
|
request:
|
|
job_id: str # unique identifier for the job
|
|
response:
|
|
synth_data_gen_job: SyntheticDataGenerationJob
|