mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-15 09:36:10 +00:00
65 lines
1.4 KiB
YAML
65 lines
1.4 KiB
YAML
== Schema ==
|
|
Message:
|
|
role: str
|
|
text: str
|
|
attachements: List[MediaAttachment]
|
|
eot: bool
|
|
tool_call: bool # if it's a tool call - builtin or custom or ipython
|
|
# for streaming
|
|
is_complete: bool
|
|
is_header_complete: bool
|
|
metadata: json
|
|
|
|
MediaAttachment:
|
|
attachement_type: MediaAttachmentType
|
|
data_type: MediaAttachmentDataType
|
|
data: str
|
|
|
|
MediaAttachmentType: # enum [image, video, audio, text(or file)]
|
|
MediaAttachmentDataType: # enum [raw_bytes, filepath, uri]
|
|
|
|
BatchInference:
|
|
job_id: str # id provided by the api
|
|
created: string # format - date-time
|
|
status: string # enum (validating, running, completed, failed)
|
|
input_file_path: Path # jsonl style file where each
|
|
success_file_path: Path
|
|
error_file_path: Path
|
|
metadata: json
|
|
|
|
Options:
|
|
logprobs: bool
|
|
max_tokens: int
|
|
temperature: float
|
|
top_p: float
|
|
|
|
Path:
|
|
value: string
|
|
type: string # enum [raw_bytes, filepath, uri]
|
|
|
|
== Callsites ==
|
|
|
|
callsite:
|
|
/batch_inference/submit_job
|
|
request_type:
|
|
post
|
|
description:
|
|
Submit a batch inference job
|
|
request:
|
|
model: str
|
|
prompt_file_path: Path # jsonl style file where each line is a json encoded List[Message]
|
|
options: Options
|
|
num_generations: int
|
|
response:
|
|
batch_inference_job: BatchInference
|
|
|
|
callsite:
|
|
/batch_inference/job_status
|
|
request_type:
|
|
get
|
|
description:
|
|
Get status for an already submitted job
|
|
request:
|
|
job_id: str # unique identifier for the job
|
|
response:
|
|
batch_inference_job: BatchInference
|