llama-stack-mirror/llama_toolchain/agentic_system/client_sdk.py

import fire
from llama_stack import LlamaStack
from llama_stack.types import UserMessage

def main(host: str, port: int):
    client = LlamaStack(
        base_url=f"http://{host}:{port}",
    )

    # Need smt like this to work w/ server, however this is not what was generated by SDK
    # response = client.inference.chat_completion(
    #     request={
    #         "messages": [
    #             UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"),
    #         ],
    #         "model": "Meta-Llama3.1-8B-Instruct",
    #         "stream": True,
    #     },
    # )

    agentic_system_create_response = client.agentic_system.create(
        agent_config={
            "instructions": "You are a helpful assistant",
            "model": "Meta-Llama3.1-8B-Instruct",
        },
    )
    print(agentic_system_create_response)

    agentic_system_create_session_response = client.agentic_system.sessions.create(
        agent_id=agentic_system_create_response.agent_id,
        session_name="test_session",
    )
    print(agentic_system_create_session_response)

    # TODO(xiyan): This does not work with current server, need to wrap it in a request (similar to AgentConfig?)
    response = client.agentic_system.turns.create(
        agent_id=agentic_system_create_response.agent_id,
        session_id=agentic_system_create_session_response.session_id,
        messages=[
            UserMessage(content="What is the capital of France?", role="user"),
        ],
        stream=True,
    )

    # print(response)


if __name__ == "__main__":
    fire.Fire(main)