diff --git a/llama_toolchain/agentic_system/client_sdk.py b/llama_toolchain/agentic_system/client_sdk.py
deleted file mode 100644
index f8eb5550d..000000000
--- a/llama_toolchain/agentic_system/client_sdk.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import fire
-from llama_stack import LlamaStack
-from llama_stack.types import UserMessage
-
-def main(host: str, port: int):
-    client = LlamaStack(
-        base_url=f"http://{host}:{port}",
-    )
-
-    agentic_system_create_response = client.agentic_system.create(
-        agent_config={
-            "instructions": "You are a helpful assistant",
-            "model": "Meta-Llama3.1-8B-Instruct",
-        },
-    )
-    print(agentic_system_create_response)
-
-    agentic_system_create_session_response = client.agentic_system.sessions.create(
-        agent_id=agentic_system_create_response.agent_id,
-        session_name="test_session",
-    )
-    print(agentic_system_create_session_response)
-
-    # TODO(xiyan): remove request wrapper
-    response = client.agentic_system.turns.create(
-        request={
-            "agent_id": agentic_system_create_response.agent_id,
-            "session_id": agentic_system_create_session_response.session_id,
-            "messages": [
-                UserMessage(content="What is the capital of France?", role="user"),
-            ],
-            "stream": False,
-        }
-    )
-
-    print(response)
-
-
-if __name__ == "__main__":
-    fire.Fire(main)
diff --git a/llama_toolchain/inference/client_sdk.py b/llama_toolchain/inference/client_sdk.py
deleted file mode 100644
index 3abcd5d51..000000000
--- a/llama_toolchain/inference/client_sdk.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import fire
-from llama_stack import LlamaStack
-from llama_stack.types import UserMessage
-
-def main(host: str, port: int):
-    client = LlamaStack(
-        base_url=f"http://{host}:{port}",
-    )
-
-    response = client.inference.chat_completion(
-        request={
-            "messages": [
-                UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"),
-            ],
-            "model": "Meta-Llama3.1-8B-Instruct",
-            "stream": False,
-        },
-    )
-
-    print(response)
-    # TODO (xiyan). This does not work with current server, need to fix
-    # response = client.inference.chat_completion(
-    #     messages=[
-    #         UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"),
-    #     ],
-    #     model="Meta-Llama3.1-8B-Instruct",
-    #     stream=True,
-    # )
-
-if __name__ == "__main__":
-    fire.Fire(main)