implement full-passthrough in the server

2025-12-04 02:03:44 +00:00 · 2024-08-03 14:15:20 -07:00 · 2024-08-03 14:15:20 -07:00 · 9dafa6ad94
commit 9dafa6ad94
parent 38fd76f85c
8 changed files with 69 additions and 71 deletions
--- a/llama_toolchain/inference/client.py
+++ b/llama_toolchain/inference/client.py
@ -75,7 +75,7 @@ class InferenceClient(Inference):
 async def run_main(host: str, port: int, stream: bool):
    client = InferenceClient(f"http://{host}:{port}")

-    message = UserMessage(content="hello world, help me out here")
+    message = UserMessage(content="hello world, troll me in two-paragraphs about 42")
    cprint(f"User>{message.content}", "green")
    iterator = client.chat_completion(
        ChatCompletionRequest(