mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-24 21:29:53 +00:00
updates to the batch inference apis
This commit is contained in:
parent
eb81ad1ffd
commit
04f89ad315
2 changed files with 193 additions and 76 deletions
65
simple_view/batch_inference.yml
Normal file
65
simple_view/batch_inference.yml
Normal file
|
@ -0,0 +1,65 @@
|
|||
== Schema ==
|
||||
Message:
|
||||
role: str
|
||||
text: str
|
||||
attachements: List[MediaAttachment]
|
||||
eot: bool
|
||||
tool_call: bool # if it's a tool call - builtin or custom or ipython
|
||||
# for streaming
|
||||
is_complete: bool
|
||||
is_header_complete: bool
|
||||
metadata: json
|
||||
|
||||
MediaAttachment:
|
||||
attachement_type: MediaAttachmentType
|
||||
data_type: MediaAttachmentDataType
|
||||
data: str
|
||||
|
||||
MediaAttachmentType: # enum [image, video, audio, text(or file)]
|
||||
MediaAttachmentDataType: # enum [raw_bytes, filepath, uri]
|
||||
|
||||
BatchInference:
|
||||
job_id: str # id provided by the api
|
||||
created: string # format - date-time
|
||||
status: string # enum (validating, running, completed, failed)
|
||||
input_file_path: Path # jsonl style file where each
|
||||
success_file_path: Path
|
||||
error_file_path: Path
|
||||
metadata: json
|
||||
|
||||
Options:
|
||||
logprobs: bool
|
||||
max_tokens: int
|
||||
temperature: float
|
||||
top_p: float
|
||||
|
||||
Path:
|
||||
value: string
|
||||
type: string # enum [raw_bytes, filepath, uri]
|
||||
|
||||
== Callsites ==
|
||||
|
||||
callsite:
|
||||
/batch_inference/submit_job
|
||||
request_type:
|
||||
post
|
||||
description:
|
||||
Submit a batch inference job
|
||||
request:
|
||||
model: str
|
||||
prompt_file_path: Path # jsonl style file where each line is a json encoded List[Message]
|
||||
options: Options
|
||||
num_generations: int
|
||||
response:
|
||||
batch_inference_job: BatchInference
|
||||
|
||||
callsite:
|
||||
/batch_inference/job_status
|
||||
request_type:
|
||||
get
|
||||
description:
|
||||
Get status for an already submitted job
|
||||
request:
|
||||
job_id: str # unique identifier for the job
|
||||
response:
|
||||
batch_inference_job: BatchInference
|
Loading…
Add table
Add a link
Reference in a new issue