migrate inference/completion

This commit is contained in:
Xi Yan 2024-09-11 12:29:22 -07:00
parent 0c7c6b7e02
commit a7be58e4e1
2 changed files with 6 additions and 1 deletions

View file

@ -170,7 +170,11 @@ class Inference(Protocol):
@webmethod(route="/inference/completion")
async def completion(
self,
request: CompletionRequest,
model: str
content: InterleavedTextMedia,
sampling_params: Optional[SamplingParams] = SamplingParams(),
stream: Optional[bool] = False,
logprobs: Optional[LogProbConfig] = None,
) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
@webmethod(route="/inference/chat_completion")

View file

@ -65,6 +65,7 @@ class MetaReferenceInferenceImpl(Inference):
) -> AsyncIterator[
Union[ChatCompletionResponseStreamChunk, ChatCompletionResponse]
]:
# wrapper request to make it easier to pass around (internal only, not exposed to API)
request = ChatCompletionRequest(
model=model,
messages=messages,