diff --git a/llama_toolchain/agentic_system/client_sdk.py b/llama_toolchain/agentic_system/client_sdk.py new file mode 100644 index 000000000..18c83e83c --- /dev/null +++ b/llama_toolchain/agentic_system/client_sdk.py @@ -0,0 +1,49 @@ +import fire +from llama_stack import LlamaStack +from llama_stack.types import UserMessage + +def main(host: str, port: int): + client = LlamaStack( + base_url=f"http://{host}:{port}", + ) + + # Need smt like this to work w/ server, however this is not what was generated by SDK + # response = client.inference.chat_completion( + # request={ + # "messages": [ + # UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"), + # ], + # "model": "Meta-Llama3.1-8B-Instruct", + # "stream": True, + # }, + # ) + + agentic_system_create_response = client.agentic_system.create( + agent_config={ + "instructions": "You are a helpful assistant", + "model": "Meta-Llama3.1-8B-Instruct", + }, + ) + print(agentic_system_create_response) + + agentic_system_create_session_response = client.agentic_system.sessions.create( + agent_id=agentic_system_create_response.agent_id, + session_name="test_session", + ) + print(agentic_system_create_session_response) + + # TODO(xiyan): This does not work with current server, need to wrap it in a request (similar to AgentConfig?) + response = client.agentic_system.turns.create( + agent_id=agentic_system_create_response.agent_id, + session_id=agentic_system_create_session_response.session_id, + messages=[ + UserMessage(content="What is the capital of France?", role="user"), + ], + stream=True, + ) + + # print(response) + + +if __name__ == "__main__": + fire.Fire(main) diff --git a/llama_toolchain/inference/client_sdk.py b/llama_toolchain/inference/client_sdk.py new file mode 100644 index 000000000..e0cf77f78 --- /dev/null +++ b/llama_toolchain/inference/client_sdk.py @@ -0,0 +1,32 @@ +import fire +from llama_stack import LlamaStack +from llama_stack.types import UserMessage + +def main(host: str, port: int): + client = LlamaStack( + base_url=f"http://{host}:{port}", + ) + + # Need smt like this to work w/ server, however this is not what was generated by SDK (?) + response = client.inference.chat_completion( + request={ + "messages": [ + UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"), + ], + "model": "Meta-Llama3.1-8B-Instruct", + "stream": False, + }, + ) + + print(response) + # This does not work with current server + # response = client.inference.chat_completion( + # messages=[ + # UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"), + # ], + # model="Meta-Llama3.1-8B-Instruct", + # stream=True, + # ) + +if __name__ == "__main__": + fire.Fire(main)