llama-stack-mirror/simple_view/batch_inference.yml

== Schema ==
Message:
  role: str
  text: str
  attachements: List[MediaAttachment]
  eot: bool
  tool_call: bool  # if it's a tool call - builtin or custom or ipython
  # for streaming
  is_complete: bool
  is_header_complete: bool
  metadata: json

MediaAttachment:
  attachement_type: MediaAttachmentType
  data_type: MediaAttachmentDataType
  data: str

MediaAttachmentType: # enum [image, video, audio, text(or file)]
MediaAttachmentDataType:  # enum [raw_bytes, filepath, uri]

BatchInference:
  job_id: str  # id provided by the api
  created: string # format - date-time
  status: string  # enum (validating, running, completed, failed)
  input_file_path: Path  # jsonl style file where each
  success_file_path: Path
  error_file_path: Path
  metadata: json

Options:
  logprobs: bool
  max_tokens: int
  temperature: float
  top_p: float

Path:
  value: string
  type: string # enum [raw_bytes, filepath, uri]

== Callsites ==

callsite:
  /batch_inference/submit_job
request_type:
  post
description:
  Submit a batch inference job
request:
  model: str
  prompt_file_path: Path  # jsonl style file where each line is a json encoded List[Message]
  options: Options
  num_generations: int
response:
  batch_inference_job: BatchInference

callsite:
  /batch_inference/job_status
request_type:
  get
description:
  Get status for an already submitted job
request:
  job_id: str  # unique identifier for the job
response:
  batch_inference_job: BatchInference