mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 23:29:43 +00:00
Added batch inference
This commit is contained in:
parent
22d6093258
commit
6fb69efbe5
4 changed files with 57 additions and 10 deletions
|
@ -27,6 +27,7 @@ from finetuning_types import (
|
||||||
from model_types import (
|
from model_types import (
|
||||||
BuiltinTool,
|
BuiltinTool,
|
||||||
Content,
|
Content,
|
||||||
|
Dialog,
|
||||||
InstructModel,
|
InstructModel,
|
||||||
Message,
|
Message,
|
||||||
PretrainedModel,
|
PretrainedModel,
|
||||||
|
@ -130,6 +131,45 @@ class Inference(Protocol):
|
||||||
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
|
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
@dataclass
|
||||||
|
class BatchCompletionRequest:
|
||||||
|
content_batch: List[Content]
|
||||||
|
model: PretrainedModel
|
||||||
|
sampling_params: SamplingParams = SamplingParams()
|
||||||
|
max_tokens: int = 0
|
||||||
|
logprobs: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
@dataclass
|
||||||
|
class BatchChatCompletionRequest:
|
||||||
|
model: InstructModel
|
||||||
|
batch_messages: List[Dialog]
|
||||||
|
sampling_params: SamplingParams = SamplingParams()
|
||||||
|
|
||||||
|
# zero-shot tool definitions as input to the model
|
||||||
|
available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
|
||||||
|
default_factory=list
|
||||||
|
)
|
||||||
|
|
||||||
|
max_tokens: int = 0
|
||||||
|
logprobs: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class BatchInference(Protocol):
|
||||||
|
"""Batch inference calls"""
|
||||||
|
def post_batch_completion(
|
||||||
|
self,
|
||||||
|
request: BatchCompletionRequest,
|
||||||
|
) -> List[CompletionResponse]: ...
|
||||||
|
|
||||||
|
def post_batch_chat_completion(
|
||||||
|
self,
|
||||||
|
request: BatchChatCompletionRequest,
|
||||||
|
) -> List[ChatCompletionResponse]: ...
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class AgenticSystemCreateRequest:
|
class AgenticSystemCreateRequest:
|
||||||
instructions: str
|
instructions: str
|
||||||
|
|
|
@ -121,6 +121,13 @@ class Message:
|
||||||
tool_responses: List[ToolResponse] = field(default_factory=list)
|
tool_responses: List[ToolResponse] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
@dataclass
|
||||||
|
class Dialog:
|
||||||
|
message: Message
|
||||||
|
message_history: List[Message] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class SamplingParams:
|
class SamplingParams:
|
||||||
temperature: float = 0.0
|
temperature: float = 0.0
|
||||||
|
|
|
@ -2406,22 +2406,22 @@
|
||||||
],
|
],
|
||||||
"tags": [
|
"tags": [
|
||||||
{
|
{
|
||||||
"name": "RewardScoring"
|
"name": "SyntheticDataGeneration"
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Inference"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Datasets"
|
"name": "Datasets"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "SyntheticDataGeneration"
|
"name": "AgenticSystem"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Inference"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Finetuning"
|
"name": "Finetuning"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "AgenticSystem"
|
"name": "RewardScoring"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ShieldConfig",
|
"name": "ShieldConfig",
|
||||||
|
|
|
@ -1469,12 +1469,12 @@ security:
|
||||||
servers:
|
servers:
|
||||||
- url: http://llama.meta.com
|
- url: http://llama.meta.com
|
||||||
tags:
|
tags:
|
||||||
- name: RewardScoring
|
|
||||||
- name: Inference
|
|
||||||
- name: Datasets
|
|
||||||
- name: SyntheticDataGeneration
|
- name: SyntheticDataGeneration
|
||||||
- name: Finetuning
|
- name: Datasets
|
||||||
- name: AgenticSystem
|
- name: AgenticSystem
|
||||||
|
- name: Inference
|
||||||
|
- name: Finetuning
|
||||||
|
- name: RewardScoring
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
|
||||||
name: ShieldConfig
|
name: ShieldConfig
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue