From 486c0bc9c8098de11f76c8a28cb575658add8d06 Mon Sep 17 00:00:00 2001 From: Botao Chen Date: Tue, 17 Dec 2024 13:41:36 -0800 Subject: [PATCH] refine --- llama_stack/templates/meta-reference-gpu/run.yaml | 2 +- llama_stack/templates/meta-reference-quantized-gpu/run.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 0763d0c36..c19066664 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -16,7 +16,7 @@ providers: - provider_id: meta-reference-inference provider_type: inline::meta-reference config: - model: ${env.INFERENCE_MODEL} # please make sure your inference model here is added as resource + model: ${env.INFERENCE_MODEL} max_seq_len: 4096 checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} - provider_id: sentence-transformers diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml index ea34d3424..550170a00 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml @@ -16,7 +16,7 @@ providers: - provider_id: meta-reference-inference provider_type: inline::meta-reference-quantized config: - model: ${env.INFERENCE_MODEL} # please make sure your inference model here is added as resource + model: ${env.INFERENCE_MODEL} max_seq_len: 4096 checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} quantization: