added simple inferences

2025-10-15 06:37:58 +00:00 · 2024-10-31 15:45:47 -07:00 · 2024-10-31 15:45:47 -07:00 · 626dffa0d9
commit 626dffa0d9
parent e4560a5e74
2 changed files with 58 additions and 0 deletions
--- a/docs/source/inference-streaming.py
+++ b/docs/source/inference-streaming.py
@ -0,0 +1,36 @@
 import asyncio
 from llama_stack_client import LlamaStackClient
 from llama_stack_client.lib.inference.event_logger import EventLogger
 from llama_stack_client.types import UserMessage
 from termcolor import cprint
 async def run_main(stream: bool = True):
    client = LlamaStackClient(
        base_url=f"http://localhost:5000",
    )
    message = UserMessage(
        content="hello world, write me a 2 sentence poem about the moon", role="user"
    )
    print(f"User>{message.content}", "green")
    response = client.inference.chat_completion(
        messages=[message],
        model="Llama3.2-11B-Vision-Instruct",
        stream=stream,
    )
    if not stream:
        cprint(f"> Response: {response}", "cyan")
    else:
        async for log in EventLogger().log(response):
            log.print()
    models_response = client.models.list()
    print(models_response)
 if __name__ == "__main__":
    asyncio.run(run_main())
--- a/docs/source/inference.py
+++ b/docs/source/inference.py
@ -0,0 +1,22 @@
 import asyncio
 from llama_stack_client import LlamaStackClient
 from llama_stack_client.lib.inference.event_logger import EventLogger
 from llama_stack_client.types import UserMessage
 from termcolor import cprint
 client = LlamaStackClient(
    base_url=f"http://localhost:5000",
 )
 message = UserMessage(
    content="hello world, write me a 2 sentence poem about the moon", role="user"
 )
 cprint(f"User>{message.content}", "green")
 response = client.inference.chat_completion(
    messages=[message],
    model="Llama3.2-11B-Vision-Instruct",
 )
 cprint(f"> Response: {response.completion_message.content}", "cyan")