From 626dffa0d95c58c046b962a56f4926eb9851df29 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Thu, 31 Oct 2024 15:45:47 -0700 Subject: [PATCH] added simple inferences --- docs/source/inference-streaming.py | 36 ++++++++++++++++++++++++++++++ docs/source/inference.py | 22 ++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 docs/source/inference-streaming.py create mode 100644 docs/source/inference.py diff --git a/docs/source/inference-streaming.py b/docs/source/inference-streaming.py new file mode 100644 index 000000000..85afbb4af --- /dev/null +++ b/docs/source/inference-streaming.py @@ -0,0 +1,36 @@ +import asyncio + +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.inference.event_logger import EventLogger +from llama_stack_client.types import UserMessage +from termcolor import cprint + + +async def run_main(stream: bool = True): + client = LlamaStackClient( + base_url=f"http://localhost:5000", + ) + + message = UserMessage( + content="hello world, write me a 2 sentence poem about the moon", role="user" + ) + print(f"User>{message.content}", "green") + + response = client.inference.chat_completion( + messages=[message], + model="Llama3.2-11B-Vision-Instruct", + stream=stream, + ) + + if not stream: + cprint(f"> Response: {response}", "cyan") + else: + async for log in EventLogger().log(response): + log.print() + + models_response = client.models.list() + print(models_response) + + +if __name__ == "__main__": + asyncio.run(run_main()) diff --git a/docs/source/inference.py b/docs/source/inference.py new file mode 100644 index 000000000..82f014887 --- /dev/null +++ b/docs/source/inference.py @@ -0,0 +1,22 @@ +import asyncio + +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.inference.event_logger import EventLogger +from llama_stack_client.types import UserMessage +from termcolor import cprint + + +client = LlamaStackClient( + base_url=f"http://localhost:5000", +) +message = UserMessage( + content="hello world, write me a 2 sentence poem about the moon", role="user" +) + +cprint(f"User>{message.content}", "green") +response = client.inference.chat_completion( + messages=[message], + model="Llama3.2-11B-Vision-Instruct", +) + +cprint(f"> Response: {response.completion_message.content}", "cyan")