split batch_inference from inference

This commit is contained in:
Ashwin Bharambe 2024-08-26 13:17:59 -07:00
parent 986a865e62
commit dc433f6c90
5 changed files with 75 additions and 12 deletions

View file

@ -185,15 +185,3 @@ class Inference(Protocol):
model: str,
contents: List[InterleavedTextMedia],
) -> EmbeddingsResponse: ...
@webmethod(route="/inference/batch_completion")
async def batch_completion(
self,
request: BatchCompletionRequest,
) -> BatchCompletionResponse: ...
@webmethod(route="/inference/batch_chat_completion")
async def batch_chat_completion(
self,
request: BatchChatCompletionRequest,
) -> BatchChatCompletionResponse: ...