adding a few more inference examples

2025-10-15 06:37:58 +00:00 · 2024-10-31 15:46:47 -07:00 · 2024-10-31 15:46:47 -07:00 · 703d7ebb6e
commit 703d7ebb6e
parent 626dffa0d9
2 changed files with 69 additions and 0 deletions
--- a/docs/source/inference-loop-history.py
+++ b/docs/source/inference-loop-history.py
@ -0,0 +1,37 @@
 import asyncio
 from llama_stack_client import LlamaStackClient
 from llama_stack_client.types import UserMessage
 from termcolor import cprint
 client = LlamaStackClient(
    base_url="http://localhost:5000",
 )
 async def chat_loop():
    conversation_history = []
    while True:
        user_input = input("User> ")
        if user_input.lower() in ["exit", "quit", "bye"]:
            cprint("Ending conversation. Goodbye!", "yellow")
            break
        user_message = UserMessage(content=user_input, role="user")
        conversation_history.append(user_message)
        response = client.inference.chat_completion(
            messages=conversation_history,
            model="Llama3.2-11B-Vision-Instruct",
        )
        cprint(f"> Response: {response.completion_message.content}", "cyan")
        assistant_message = UserMessage(
            content=response.completion_message.content, role="user"
        )
        conversation_history.append(assistant_message)
 asyncio.run(chat_loop())
--- a/docs/source/inference-loop.py
+++ b/docs/source/inference-loop.py
@ -0,0 +1,32 @@
 import asyncio
 from llama_stack_client import LlamaStackClient
 from llama_stack_client.lib.inference.event_logger import EventLogger
 from llama_stack_client.types import UserMessage
 from termcolor import cprint
 client = LlamaStackClient(
    base_url="http://localhost:5000",
 )
 async def chat_loop():
    while True:
        user_input = input("User> ")
        if user_input.lower() in ["exit", "quit", "bye"]:
            cprint("Ending conversation. Goodbye!", "yellow")
            break
        message = UserMessage(content=user_input, role="user")
        response = client.inference.chat_completion(
            messages=[message],
            model="Llama3.2-11B-Vision-Instruct",
        )
        cprint(f"> Response: {response.completion_message.content}", "cyan")
 asyncio.run(chat_loop())