mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
32 lines
971 B
Python
32 lines
971 B
Python
import fire
|
|
from llama_stack import LlamaStack
|
|
from llama_stack.types import UserMessage
|
|
|
|
def main(host: str, port: int):
|
|
client = LlamaStack(
|
|
base_url=f"http://{host}:{port}",
|
|
)
|
|
|
|
# Need smt like this to work w/ server, however this is not what was generated by SDK (?)
|
|
response = client.inference.chat_completion(
|
|
request={
|
|
"messages": [
|
|
UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"),
|
|
],
|
|
"model": "Meta-Llama3.1-8B-Instruct",
|
|
"stream": False,
|
|
},
|
|
)
|
|
|
|
print(response)
|
|
# This does not work with current server
|
|
# response = client.inference.chat_completion(
|
|
# messages=[
|
|
# UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"),
|
|
# ],
|
|
# model="Meta-Llama3.1-8B-Instruct",
|
|
# stream=True,
|
|
# )
|
|
|
|
if __name__ == "__main__":
|
|
fire.Fire(main)
|