diff --git a/docs/source/inference-loop-history.py b/docs/source/inference-loop-history.py new file mode 100644 index 000000000..5dc61fc51 --- /dev/null +++ b/docs/source/inference-loop-history.py @@ -0,0 +1,37 @@ +import asyncio + +from llama_stack_client import LlamaStackClient +from llama_stack_client.types import UserMessage +from termcolor import cprint + +client = LlamaStackClient( + base_url="http://localhost:5000", +) + + +async def chat_loop(): + conversation_history = [] + + while True: + user_input = input("User> ") + if user_input.lower() in ["exit", "quit", "bye"]: + cprint("Ending conversation. Goodbye!", "yellow") + break + + user_message = UserMessage(content=user_input, role="user") + conversation_history.append(user_message) + + response = client.inference.chat_completion( + messages=conversation_history, + model="Llama3.2-11B-Vision-Instruct", + ) + + cprint(f"> Response: {response.completion_message.content}", "cyan") + + assistant_message = UserMessage( + content=response.completion_message.content, role="user" + ) + conversation_history.append(assistant_message) + + +asyncio.run(chat_loop()) diff --git a/docs/source/inference-loop.py b/docs/source/inference-loop.py new file mode 100644 index 000000000..031f22d5e --- /dev/null +++ b/docs/source/inference-loop.py @@ -0,0 +1,32 @@ +import asyncio + +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.inference.event_logger import EventLogger +from llama_stack_client.types import UserMessage +from termcolor import cprint + +client = LlamaStackClient( + base_url="http://localhost:5000", +) + + +async def chat_loop(): + while True: + + user_input = input("User> ") + + if user_input.lower() in ["exit", "quit", "bye"]: + cprint("Ending conversation. Goodbye!", "yellow") + break + + message = UserMessage(content=user_input, role="user") + + response = client.inference.chat_completion( + messages=[message], + model="Llama3.2-11B-Vision-Instruct", + ) + + cprint(f"> Response: {response.completion_message.content}", "cyan") + + +asyncio.run(chat_loop())