llama-stack-mirror/simple_view/batch_inference.yml
2024-06-26 15:45:18 -07:00

65 lines
1.4 KiB
YAML

== Schema ==
Message:
role: str
text: str
attachements: List[MediaAttachment]
eot: bool
tool_call: bool # if it's a tool call - builtin or custom or ipython
# for streaming
is_complete: bool
is_header_complete: bool
metadata: json
MediaAttachment:
attachement_type: MediaAttachmentType
data_type: MediaAttachmentDataType
data: str
MediaAttachmentType: # enum [image, video, audio, text(or file)]
MediaAttachmentDataType: # enum [raw_bytes, filepath, uri]
BatchInference:
job_id: str # id provided by the api
created: string # format - date-time
status: string # enum (validating, running, completed, failed)
input_file_path: Path # jsonl style file where each
success_file_path: Path
error_file_path: Path
metadata: json
Options:
logprobs: bool
max_tokens: int
temperature: float
top_p: float
Path:
value: string
type: string # enum [raw_bytes, filepath, uri]
== Callsites ==
callsite:
/batch_inference/submit_job
request_type:
post
description:
Submit a batch inference job
request:
model: str
prompt_file_path: Path # jsonl style file where each line is a json encoded List[Message]
options: Options
num_generations: int
response:
batch_inference_job: BatchInference
callsite:
/batch_inference/job_status
request_type:
get
description:
Get status for an already submitted job
request:
job_id: str # unique identifier for the job
response:
batch_inference_job: BatchInference