diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py
index 15954ef57..f5321c628 100644
--- a/llama_stack/apis/inference/client.py
+++ b/llama_stack/apis/inference/client.py
@@ -90,7 +90,7 @@ async def run_main(host: str, port: int, stream: bool):
     cprint(f"User>{message.content}", "green")
     iterator = client.chat_completion(
         ChatCompletionRequest(
-            model="ollama-1",
+            model="Meta-Llama3.1-8B-Instruct",
             messages=[message],
             stream=stream,
         )