migrate inference/completion

2025-12-03 09:53:45 +00:00 · 2024-09-11 12:29:22 -07:00 · 2024-09-11 12:29:22 -07:00 · a7be58e4e1
commit a7be58e4e1
parent 0c7c6b7e02
2 changed files with 6 additions and 1 deletions
--- a/llama_toolchain/inference/api/api.py
+++ b/llama_toolchain/inference/api/api.py
@ -170,7 +170,11 @@ class Inference(Protocol):
    @webmethod(route="/inference/completion")
    async def completion(
        self,
-        request: CompletionRequest,
+        model: str
+        content: InterleavedTextMedia,
+        sampling_params: Optional[SamplingParams] = SamplingParams(),
+        stream: Optional[bool] = False,
+        logprobs: Optional[LogProbConfig] = None,
    ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...

    @webmethod(route="/inference/chat_completion")
--- a/llama_toolchain/inference/meta_reference/inference.py
+++ b/llama_toolchain/inference/meta_reference/inference.py
@ -65,6 +65,7 @@ class MetaReferenceInferenceImpl(Inference):
    ) -> AsyncIterator[
        Union[ChatCompletionResponseStreamChunk, ChatCompletionResponse]
    ]:
+        # wrapper request to make it easier to pass around (internal only, not exposed to API)
        request = ChatCompletionRequest(
            model=model,
            messages=messages,