adding a few more inference examples

2025-10-14 22:33:48 +00:00 · 2024-10-31 15:46:47 -07:00 · 2024-10-31 15:46:47 -07:00 · 703d7ebb6e
commit 703d7ebb6e
parent 626dffa0d9
2 changed files with 69 additions and 0 deletions
--- a/docs/source/inference-loop-history.py
+++ b/docs/source/inference-loop-history.py
@ -0,0 +1,37 @@
+import asyncio
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.types import UserMessage
+from termcolor import cprint
+
+client = LlamaStackClient(
+    base_url="http://localhost:5000",
+)
+
+
+async def chat_loop():
+    conversation_history = []
+
+    while True:
+        user_input = input("User> ")
+        if user_input.lower() in ["exit", "quit", "bye"]:
+            cprint("Ending conversation. Goodbye!", "yellow")
+            break
+
+        user_message = UserMessage(content=user_input, role="user")
+        conversation_history.append(user_message)
+
+        response = client.inference.chat_completion(
+            messages=conversation_history,
+            model="Llama3.2-11B-Vision-Instruct",
+        )
+
+        cprint(f"> Response: {response.completion_message.content}", "cyan")
+
+        assistant_message = UserMessage(
+            content=response.completion_message.content, role="user"
+        )
+        conversation_history.append(assistant_message)
+
+
+asyncio.run(chat_loop())
--- a/docs/source/inference-loop.py
+++ b/docs/source/inference-loop.py
@ -0,0 +1,32 @@
+import asyncio
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.lib.inference.event_logger import EventLogger
+from llama_stack_client.types import UserMessage
+from termcolor import cprint
+
+client = LlamaStackClient(
+    base_url="http://localhost:5000",
+)
+
+
+async def chat_loop():
+    while True:
+
+        user_input = input("User> ")
+
+        if user_input.lower() in ["exit", "quit", "bye"]:
+            cprint("Ending conversation. Goodbye!", "yellow")
+            break
+
+        message = UserMessage(content=user_input, role="user")
+
+        response = client.inference.chat_completion(
+            messages=[message],
+            model="Llama3.2-11B-Vision-Instruct",
+        )
+
+        cprint(f"> Response: {response.completion_message.content}", "cyan")
+
+
+asyncio.run(chat_loop())