mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-31 09:33:54 +00:00
feat(api): define a more coherent jobs api across different flows
Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
This commit is contained in:
parent
71ed47ea76
commit
0f50cfa561
15 changed files with 1864 additions and 1670 deletions
|
|
@ -4,9 +4,11 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import List, Optional, Protocol, runtime_checkable
|
||||
from typing import List, Literal, Optional, Protocol, runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.job_types import Job
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.common.job_types import BaseJob
|
||||
from llama_stack.apis.inference import (
|
||||
InterleavedContent,
|
||||
LogProbConfig,
|
||||
|
|
@ -20,6 +22,14 @@ from llama_stack.apis.inference import (
|
|||
from llama_stack.schema_utils import webmethod
|
||||
|
||||
|
||||
class BatchInferenceJob(BaseJob, BaseModel):
|
||||
type: Literal["batch_inference"] = "batch_inference"
|
||||
|
||||
|
||||
class ListBatchInferenceJobsResponse(BaseModel):
|
||||
data: list[BatchInferenceJob]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class BatchInference(Protocol):
|
||||
"""Batch inference API for generating completions and chat completions.
|
||||
|
|
@ -38,7 +48,7 @@ class BatchInference(Protocol):
|
|||
sampling_params: Optional[SamplingParams] = None,
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> Job: ...
|
||||
) -> BatchInferenceJob: ...
|
||||
|
||||
@webmethod(route="/batch-inference/chat-completion", method="POST")
|
||||
async def chat_completion(
|
||||
|
|
@ -52,4 +62,4 @@ class BatchInference(Protocol):
|
|||
tool_prompt_format: Optional[ToolPromptFormat] = None,
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> Job: ...
|
||||
) -> BatchInferenceJob: ...
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue