Make all API methods async def again

2025-12-14 18:42:39 +00:00 · 2024-10-18 16:50:57 -07:00 · 2024-10-18 16:50:57 -07:00 · 627edaf407
commit 627edaf407
parent 95a96afe34
17 changed files with 120 additions and 145 deletions
--- a/llama_stack/apis/inference/client.py
+++ b/llama_stack/apis/inference/client.py
@ -42,10 +42,10 @@ class InferenceClient(Inference):
    async def shutdown(self) -> None:
        pass

-    def completion(self, request: CompletionRequest) -> AsyncGenerator:
+    async def completion(self, request: CompletionRequest) -> AsyncGenerator:
        raise NotImplementedError()

-    def chat_completion(
+    async def chat_completion(
        self,
        model: str,
        messages: List[Message],
@ -139,7 +139,8 @@ async def run_main(
    else:
        logprobs_config = None

-    iterator = client.chat_completion(
+    assert stream, "Non streaming not supported here"
+    iterator = await client.chat_completion(
        model=model,
        messages=[message],
        stream=stream,