split batch_inference from inference

2025-10-04 12:07:34 +00:00 · 2024-08-26 13:17:59 -07:00 · 2024-08-26 13:17:59 -07:00 · dc433f6c90
commit dc433f6c90
parent 986a865e62
5 changed files with 75 additions and 12 deletions
--- a/llama_toolchain/inference/api/api.py
+++ b/llama_toolchain/inference/api/api.py
@ -185,15 +185,3 @@ class Inference(Protocol):
        model: str,
        contents: List[InterleavedTextMedia],
    ) -> EmbeddingsResponse: ...
-
-    @webmethod(route="/inference/batch_completion")
-    async def batch_completion(
-        self,
-        request: BatchCompletionRequest,
-    ) -> BatchCompletionResponse: ...
-
-    @webmethod(route="/inference/batch_chat_completion")
-    async def batch_chat_completion(
-        self,
-        request: BatchChatCompletionRequest,
-    ) -> BatchChatCompletionResponse: ...