From f6146f8e58ad4eb1b24a415d52b09af98dd5d2f3 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 19 Sep 2024 21:44:12 -0700 Subject: [PATCH] 2 models routing client --- llama_stack/apis/inference/client.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index 51cc586fe..cdcca8b6b 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -89,10 +89,11 @@ async def run_main(host: str, port: int, stream: bool): message = UserMessage( content="hello world, write me a 2 sentence poem about the moon" ) + cprint(f"User>{message.content}", "green") iterator = client.chat_completion( ChatCompletionRequest( - model="Meta-Llama3.1-8B-Instruct", + model="Meta-Llama3.1-8B", messages=[message], stream=stream, ) @@ -103,7 +104,7 @@ async def run_main(host: str, port: int, stream: bool): cprint(f"User>{message.content}", "green") iterator = client.chat_completion( ChatCompletionRequest( - model="Meta-Llama3.1-8B", + model="Meta-Llama3.1-8B-Instruct", messages=[message], stream=stream, )