Remove configurations

This commit is contained in:
Ashwin Bharambe 2024-07-22 16:03:37 -07:00
parent bbfd8a587e
commit acb2a91872
7 changed files with 1 additions and 56 deletions

View file

@ -1,11 +0,0 @@
inference_config:
impl_type: "inline"
inline_config:
checkpoint_type: "pytorch"
checkpoint_dir: /home/ashwin/local/checkpoints/Meta-Llama-3.1-70B-Instruct-20240710150000
tokenizer_path: /home/ashwin/local/checkpoints/Meta-Llama-3.1-70B-Instruct-20240710150000/tokenizer.model
model_parallel_size: 8
max_seq_len: 2048
max_batch_size: 1
quantization:
type: "fp8"

View file

@ -1,9 +0,0 @@
inference_config:
impl_type: "inline"
inline_config:
checkpoint_type: "pytorch"
checkpoint_dir: /home/chrisluc/models/Meta-Llama-3.1-8B-Instruct-20240710150000
tokenizer_path: /home/chrisluc/models/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model
model_parallel_size: 1
max_seq_len: 2048
max_batch_size: 1

View file

@ -1,9 +0,0 @@
inference_config:
impl_type: "inline"
inline_config:
checkpoint_type: "pytorch"
checkpoint_dir: /home/cyni/local/llama-3
tokenizer_path: /home/cyni/local/llama-3/cl_toplang_128k
model_parallel_size: 1
max_seq_len: 2048
max_batch_size: 1

View file

@ -1,9 +0,0 @@
inference_config:
impl_type: "inline"
inline_config:
checkpoint_type: "pytorch"
checkpoint_dir: /home/dalton/models/Meta-Llama-3.1-8B-Instruct-20240710150000
tokenizer_path: /home/dalton/models/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model
model_parallel_size: 1
max_seq_len: 2048
max_batch_size: 1

View file

@ -1,9 +0,0 @@
inference_config:
impl_type: "inline"
inline_config:
checkpoint_type: "pytorch"
checkpoint_dir: /home/hjshah/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000
tokenizer_path: /home/hjshah/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model
model_parallel_size: 1
max_seq_len: 2048
max_batch_size: 1

View file

@ -1,9 +0,0 @@
inference_config:
impl_type: "inline"
inline_config:
checkpoint_type: "pytorch"
checkpoint_dir: /home/hjshah/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000
tokenizer_path: /home/hjshah/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model
model_parallel_size: 1
max_seq_len: 8192
max_batch_size: 1

View file

@ -18,6 +18,7 @@ from .event_logger import EventLogger
class InferenceClient(Inference): class InferenceClient(Inference):
def __init__(self, base_url: str): def __init__(self, base_url: str):
print(f"Initializing client for {base_url}")
self.base_url = base_url self.base_url = base_url
async def initialize(self) -> None: async def initialize(self) -> None: