mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
Remove configurations
This commit is contained in:
parent
bbfd8a587e
commit
acb2a91872
7 changed files with 1 additions and 56 deletions
|
@ -1,11 +0,0 @@
|
||||||
inference_config:
|
|
||||||
impl_type: "inline"
|
|
||||||
inline_config:
|
|
||||||
checkpoint_type: "pytorch"
|
|
||||||
checkpoint_dir: /home/ashwin/local/checkpoints/Meta-Llama-3.1-70B-Instruct-20240710150000
|
|
||||||
tokenizer_path: /home/ashwin/local/checkpoints/Meta-Llama-3.1-70B-Instruct-20240710150000/tokenizer.model
|
|
||||||
model_parallel_size: 8
|
|
||||||
max_seq_len: 2048
|
|
||||||
max_batch_size: 1
|
|
||||||
quantization:
|
|
||||||
type: "fp8"
|
|
|
@ -1,9 +0,0 @@
|
||||||
inference_config:
|
|
||||||
impl_type: "inline"
|
|
||||||
inline_config:
|
|
||||||
checkpoint_type: "pytorch"
|
|
||||||
checkpoint_dir: /home/chrisluc/models/Meta-Llama-3.1-8B-Instruct-20240710150000
|
|
||||||
tokenizer_path: /home/chrisluc/models/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model
|
|
||||||
model_parallel_size: 1
|
|
||||||
max_seq_len: 2048
|
|
||||||
max_batch_size: 1
|
|
|
@ -1,9 +0,0 @@
|
||||||
inference_config:
|
|
||||||
impl_type: "inline"
|
|
||||||
inline_config:
|
|
||||||
checkpoint_type: "pytorch"
|
|
||||||
checkpoint_dir: /home/cyni/local/llama-3
|
|
||||||
tokenizer_path: /home/cyni/local/llama-3/cl_toplang_128k
|
|
||||||
model_parallel_size: 1
|
|
||||||
max_seq_len: 2048
|
|
||||||
max_batch_size: 1
|
|
|
@ -1,9 +0,0 @@
|
||||||
inference_config:
|
|
||||||
impl_type: "inline"
|
|
||||||
inline_config:
|
|
||||||
checkpoint_type: "pytorch"
|
|
||||||
checkpoint_dir: /home/dalton/models/Meta-Llama-3.1-8B-Instruct-20240710150000
|
|
||||||
tokenizer_path: /home/dalton/models/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model
|
|
||||||
model_parallel_size: 1
|
|
||||||
max_seq_len: 2048
|
|
||||||
max_batch_size: 1
|
|
|
@ -1,9 +0,0 @@
|
||||||
inference_config:
|
|
||||||
impl_type: "inline"
|
|
||||||
inline_config:
|
|
||||||
checkpoint_type: "pytorch"
|
|
||||||
checkpoint_dir: /home/hjshah/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000
|
|
||||||
tokenizer_path: /home/hjshah/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model
|
|
||||||
model_parallel_size: 1
|
|
||||||
max_seq_len: 2048
|
|
||||||
max_batch_size: 1
|
|
|
@ -1,9 +0,0 @@
|
||||||
inference_config:
|
|
||||||
impl_type: "inline"
|
|
||||||
inline_config:
|
|
||||||
checkpoint_type: "pytorch"
|
|
||||||
checkpoint_dir: /home/hjshah/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000
|
|
||||||
tokenizer_path: /home/hjshah/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model
|
|
||||||
model_parallel_size: 1
|
|
||||||
max_seq_len: 8192
|
|
||||||
max_batch_size: 1
|
|
|
@ -18,6 +18,7 @@ from .event_logger import EventLogger
|
||||||
|
|
||||||
class InferenceClient(Inference):
|
class InferenceClient(Inference):
|
||||||
def __init__(self, base_url: str):
|
def __init__(self, base_url: str):
|
||||||
|
print(f"Initializing client for {base_url}")
|
||||||
self.base_url = base_url
|
self.base_url = base_url
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue