diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index 15954ef57..f5321c628 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -90,7 +90,7 @@ async def run_main(host: str, port: int, stream: bool): cprint(f"User>{message.content}", "green") iterator = client.chat_completion( ChatCompletionRequest( - model="ollama-1", + model="Meta-Llama3.1-8B-Instruct", messages=[message], stream=stream, )