diff --git a/docs/source/inference-loop-history.py b/docs/source/inference-loop-history.py
new file mode 100644
index 000000000..5dc61fc51
--- /dev/null
+++ b/docs/source/inference-loop-history.py
@@ -0,0 +1,37 @@
+import asyncio
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.types import UserMessage
+from termcolor import cprint
+
+client = LlamaStackClient(
+    base_url="http://localhost:5000",
+)
+
+
+async def chat_loop():
+    conversation_history = []
+
+    while True:
+        user_input = input("User> ")
+        if user_input.lower() in ["exit", "quit", "bye"]:
+            cprint("Ending conversation. Goodbye!", "yellow")
+            break
+
+        user_message = UserMessage(content=user_input, role="user")
+        conversation_history.append(user_message)
+
+        response = client.inference.chat_completion(
+            messages=conversation_history,
+            model="Llama3.2-11B-Vision-Instruct",
+        )
+
+        cprint(f"> Response: {response.completion_message.content}", "cyan")
+
+        assistant_message = UserMessage(
+            content=response.completion_message.content, role="user"
+        )
+        conversation_history.append(assistant_message)
+
+
+asyncio.run(chat_loop())
diff --git a/docs/source/inference-loop.py b/docs/source/inference-loop.py
new file mode 100644
index 000000000..031f22d5e
--- /dev/null
+++ b/docs/source/inference-loop.py
@@ -0,0 +1,32 @@
+import asyncio
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.lib.inference.event_logger import EventLogger
+from llama_stack_client.types import UserMessage
+from termcolor import cprint
+
+client = LlamaStackClient(
+    base_url="http://localhost:5000",
+)
+
+
+async def chat_loop():
+    while True:
+
+        user_input = input("User> ")
+
+        if user_input.lower() in ["exit", "quit", "bye"]:
+            cprint("Ending conversation. Goodbye!", "yellow")
+            break
+
+        message = UserMessage(content=user_input, role="user")
+
+        response = client.inference.chat_completion(
+            messages=[message],
+            model="Llama3.2-11B-Vision-Instruct",
+        )
+
+        cprint(f"> Response: {response.completion_message.content}", "cyan")
+
+
+asyncio.run(chat_loop())