[1/n] migrate inference/chat_completion

This commit is contained in:
Xi Yan 2024-09-11 12:21:19 -07:00
parent 1433aaf9f7
commit 0c7c6b7e02
3 changed files with 35 additions and 7 deletions

View file

@ -176,7 +176,15 @@ class Inference(Protocol):
@webmethod(route="/inference/chat_completion")
async def chat_completion(
self,
request: ChatCompletionRequest,
model: str,
messages: List[Message],
sampling_params: Optional[SamplingParams] = SamplingParams(),
# zero-shot tool definitions as input to the model
tools: Optional[List[ToolDefinition]] = list,
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
stream: Optional[bool] = False,
logprobs: Optional[LogProbConfig] = None,
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
@webmethod(route="/inference/embeddings")