From f6146f8e58ad4eb1b24a415d52b09af98dd5d2f3 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Thu, 19 Sep 2024 21:44:12 -0700
Subject: [PATCH] 2 models routing client

---
 llama_stack/apis/inference/client.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py
index 51cc586fe..cdcca8b6b 100644
--- a/llama_stack/apis/inference/client.py
+++ b/llama_stack/apis/inference/client.py
@@ -89,10 +89,11 @@ async def run_main(host: str, port: int, stream: bool):
     message = UserMessage(
         content="hello world, write me a 2 sentence poem about the moon"
     )
+
     cprint(f"User>{message.content}", "green")
     iterator = client.chat_completion(
         ChatCompletionRequest(
-            model="Meta-Llama3.1-8B-Instruct",
+            model="Meta-Llama3.1-8B",
             messages=[message],
             stream=stream,
         )
@@ -103,7 +104,7 @@ async def run_main(host: str, port: int, stream: bool):
     cprint(f"User>{message.content}", "green")
     iterator = client.chat_completion(
         ChatCompletionRequest(
-            model="Meta-Llama3.1-8B",
+            model="Meta-Llama3.1-8B-Instruct",
             messages=[message],
             stream=stream,
         )