mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 20:14:13 +00:00
49 lines
1.6 KiB
Python
49 lines
1.6 KiB
Python
import fire
|
|
from llama_stack import LlamaStack
|
|
from llama_stack.types import UserMessage
|
|
|
|
def main(host: str, port: int):
|
|
client = LlamaStack(
|
|
base_url=f"http://{host}:{port}",
|
|
)
|
|
|
|
# Need smt like this to work w/ server, however this is not what was generated by SDK
|
|
# response = client.inference.chat_completion(
|
|
# request={
|
|
# "messages": [
|
|
# UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"),
|
|
# ],
|
|
# "model": "Meta-Llama3.1-8B-Instruct",
|
|
# "stream": True,
|
|
# },
|
|
# )
|
|
|
|
agentic_system_create_response = client.agentic_system.create(
|
|
agent_config={
|
|
"instructions": "You are a helpful assistant",
|
|
"model": "Meta-Llama3.1-8B-Instruct",
|
|
},
|
|
)
|
|
print(agentic_system_create_response)
|
|
|
|
agentic_system_create_session_response = client.agentic_system.sessions.create(
|
|
agent_id=agentic_system_create_response.agent_id,
|
|
session_name="test_session",
|
|
)
|
|
print(agentic_system_create_session_response)
|
|
|
|
# TODO(xiyan): This does not work with current server, need to wrap it in a request (similar to AgentConfig?)
|
|
response = client.agentic_system.turns.create(
|
|
agent_id=agentic_system_create_response.agent_id,
|
|
session_id=agentic_system_create_session_response.session_id,
|
|
messages=[
|
|
UserMessage(content="What is the capital of France?", role="user"),
|
|
],
|
|
stream=True,
|
|
)
|
|
|
|
# print(response)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
fire.Fire(main)
|