mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
Added batch inference
This commit is contained in:
parent
22d6093258
commit
6fb69efbe5
4 changed files with 57 additions and 10 deletions
|
@ -27,6 +27,7 @@ from finetuning_types import (
|
|||
from model_types import (
|
||||
BuiltinTool,
|
||||
Content,
|
||||
Dialog,
|
||||
InstructModel,
|
||||
Message,
|
||||
PretrainedModel,
|
||||
|
@ -130,6 +131,45 @@ class Inference(Protocol):
|
|||
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
|
||||
|
||||
|
||||
@json_schema_type
|
||||
@dataclass
|
||||
class BatchCompletionRequest:
|
||||
content_batch: List[Content]
|
||||
model: PretrainedModel
|
||||
sampling_params: SamplingParams = SamplingParams()
|
||||
max_tokens: int = 0
|
||||
logprobs: bool = False
|
||||
|
||||
|
||||
@json_schema_type
|
||||
@dataclass
|
||||
class BatchChatCompletionRequest:
|
||||
model: InstructModel
|
||||
batch_messages: List[Dialog]
|
||||
sampling_params: SamplingParams = SamplingParams()
|
||||
|
||||
# zero-shot tool definitions as input to the model
|
||||
available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
|
||||
default_factory=list
|
||||
)
|
||||
|
||||
max_tokens: int = 0
|
||||
logprobs: bool = False
|
||||
|
||||
|
||||
class BatchInference(Protocol):
|
||||
"""Batch inference calls"""
|
||||
def post_batch_completion(
|
||||
self,
|
||||
request: BatchCompletionRequest,
|
||||
) -> List[CompletionResponse]: ...
|
||||
|
||||
def post_batch_chat_completion(
|
||||
self,
|
||||
request: BatchChatCompletionRequest,
|
||||
) -> List[ChatCompletionResponse]: ...
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgenticSystemCreateRequest:
|
||||
instructions: str
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue