From 626dffa0d95c58c046b962a56f4926eb9851df29 Mon Sep 17 00:00:00 2001
From: Justin Lee <justinai@meta.com>
Date: Thu, 31 Oct 2024 15:45:47 -0700
Subject: [PATCH] added simple inferences

---
 docs/source/inference-streaming.py | 36 ++++++++++++++++++++++++++++++
 docs/source/inference.py           | 22 ++++++++++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 docs/source/inference-streaming.py
 create mode 100644 docs/source/inference.py

diff --git a/docs/source/inference-streaming.py b/docs/source/inference-streaming.py
new file mode 100644
index 000000000..85afbb4af
--- /dev/null
+++ b/docs/source/inference-streaming.py
@@ -0,0 +1,36 @@
+import asyncio
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.lib.inference.event_logger import EventLogger
+from llama_stack_client.types import UserMessage
+from termcolor import cprint
+
+
+async def run_main(stream: bool = True):
+    client = LlamaStackClient(
+        base_url=f"http://localhost:5000",
+    )
+
+    message = UserMessage(
+        content="hello world, write me a 2 sentence poem about the moon", role="user"
+    )
+    print(f"User>{message.content}", "green")
+
+    response = client.inference.chat_completion(
+        messages=[message],
+        model="Llama3.2-11B-Vision-Instruct",
+        stream=stream,
+    )
+
+    if not stream:
+        cprint(f"> Response: {response}", "cyan")
+    else:
+        async for log in EventLogger().log(response):
+            log.print()
+
+    models_response = client.models.list()
+    print(models_response)
+
+
+if __name__ == "__main__":
+    asyncio.run(run_main())
diff --git a/docs/source/inference.py b/docs/source/inference.py
new file mode 100644
index 000000000..82f014887
--- /dev/null
+++ b/docs/source/inference.py
@@ -0,0 +1,22 @@
+import asyncio
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.lib.inference.event_logger import EventLogger
+from llama_stack_client.types import UserMessage
+from termcolor import cprint
+
+
+client = LlamaStackClient(
+    base_url=f"http://localhost:5000",
+)
+message = UserMessage(
+    content="hello world, write me a 2 sentence poem about the moon", role="user"
+)
+
+cprint(f"User>{message.content}", "green")
+response = client.inference.chat_completion(
+    messages=[message],
+    model="Llama3.2-11B-Vision-Instruct",
+)
+
+cprint(f"> Response: {response.completion_message.content}", "cyan")