llama-stack-mirror/llama_toolchain/inference/client_sdk.py
2024-09-08 18:31:49 -07:00

32 lines
971 B
Python

import fire
from llama_stack import LlamaStack
from llama_stack.types import UserMessage
def main(host: str, port: int):
client = LlamaStack(
base_url=f"http://{host}:{port}",
)
# Need smt like this to work w/ server, however this is not what was generated by SDK (?)
response = client.inference.chat_completion(
request={
"messages": [
UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"),
],
"model": "Meta-Llama3.1-8B-Instruct",
"stream": False,
},
)
print(response)
# This does not work with current server
# response = client.inference.chat_completion(
# messages=[
# UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"),
# ],
# model="Meta-Llama3.1-8B-Instruct",
# stream=True,
# )
if __name__ == "__main__":
fire.Fire(main)