diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 0763d0c36..c19066664 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -16,7 +16,7 @@ providers: - provider_id: meta-reference-inference provider_type: inline::meta-reference config: - model: ${env.INFERENCE_MODEL} # please make sure your inference model here is added as resource + model: ${env.INFERENCE_MODEL} max_seq_len: 4096 checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} - provider_id: sentence-transformers diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml index ea34d3424..550170a00 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml @@ -16,7 +16,7 @@ providers: - provider_id: meta-reference-inference provider_type: inline::meta-reference-quantized config: - model: ${env.INFERENCE_MODEL} # please make sure your inference model here is added as resource + model: ${env.INFERENCE_MODEL} max_seq_len: 4096 checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} quantization: