diff --git a/llama_toolchain/configs/ashwin.yaml b/llama_toolchain/configs/ashwin.yaml deleted file mode 100644 index 21ab6b880..000000000 --- a/llama_toolchain/configs/ashwin.yaml +++ /dev/null @@ -1,11 +0,0 @@ -inference_config: - impl_type: "inline" - inline_config: - checkpoint_type: "pytorch" - checkpoint_dir: /home/ashwin/local/checkpoints/Meta-Llama-3.1-70B-Instruct-20240710150000 - tokenizer_path: /home/ashwin/local/checkpoints/Meta-Llama-3.1-70B-Instruct-20240710150000/tokenizer.model - model_parallel_size: 8 - max_seq_len: 2048 - max_batch_size: 1 - quantization: - type: "fp8" diff --git a/llama_toolchain/configs/chrisluc.yaml b/llama_toolchain/configs/chrisluc.yaml deleted file mode 100644 index c44f9524e..000000000 --- a/llama_toolchain/configs/chrisluc.yaml +++ /dev/null @@ -1,9 +0,0 @@ -inference_config: - impl_type: "inline" - inline_config: - checkpoint_type: "pytorch" - checkpoint_dir: /home/chrisluc/models/Meta-Llama-3.1-8B-Instruct-20240710150000 - tokenizer_path: /home/chrisluc/models/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model - model_parallel_size: 1 - max_seq_len: 2048 - max_batch_size: 1 diff --git a/llama_toolchain/configs/cyni.yaml b/llama_toolchain/configs/cyni.yaml deleted file mode 100644 index e8edbf036..000000000 --- a/llama_toolchain/configs/cyni.yaml +++ /dev/null @@ -1,9 +0,0 @@ -inference_config: - impl_type: "inline" - inline_config: - checkpoint_type: "pytorch" - checkpoint_dir: /home/cyni/local/llama-3 - tokenizer_path: /home/cyni/local/llama-3/cl_toplang_128k - model_parallel_size: 1 - max_seq_len: 2048 - max_batch_size: 1 diff --git a/llama_toolchain/configs/default.yaml b/llama_toolchain/configs/default.yaml deleted file mode 100644 index d13f37226..000000000 --- a/llama_toolchain/configs/default.yaml +++ /dev/null @@ -1,9 +0,0 @@ -inference_config: - impl_type: "inline" - inline_config: - checkpoint_type: "pytorch" - checkpoint_dir: /home/dalton/models/Meta-Llama-3.1-8B-Instruct-20240710150000 - tokenizer_path: /home/dalton/models/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model - model_parallel_size: 1 - max_seq_len: 2048 - max_batch_size: 1 diff --git a/llama_toolchain/configs/hjshah.yaml b/llama_toolchain/configs/hjshah.yaml deleted file mode 100644 index 089ab1b5a..000000000 --- a/llama_toolchain/configs/hjshah.yaml +++ /dev/null @@ -1,9 +0,0 @@ -inference_config: - impl_type: "inline" - inline_config: - checkpoint_type: "pytorch" - checkpoint_dir: /home/hjshah/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000 - tokenizer_path: /home/hjshah/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model - model_parallel_size: 1 - max_seq_len: 2048 - max_batch_size: 1 diff --git a/llama_toolchain/configs/long_seqlen.yaml b/llama_toolchain/configs/long_seqlen.yaml deleted file mode 100644 index 9eaeab1bd..000000000 --- a/llama_toolchain/configs/long_seqlen.yaml +++ /dev/null @@ -1,9 +0,0 @@ -inference_config: - impl_type: "inline" - inline_config: - checkpoint_type: "pytorch" - checkpoint_dir: /home/hjshah/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000 - tokenizer_path: /home/hjshah/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model - model_parallel_size: 1 - max_seq_len: 8192 - max_batch_size: 1 diff --git a/llama_toolchain/inference/client.py b/llama_toolchain/inference/client.py index 2e8a36161..c798ed6fe 100644 --- a/llama_toolchain/inference/client.py +++ b/llama_toolchain/inference/client.py @@ -18,6 +18,7 @@ from .event_logger import EventLogger class InferenceClient(Inference): def __init__(self, base_url: str): + print(f"Initializing client for {base_url}") self.base_url = base_url async def initialize(self) -> None: