diff --git a/llama_stack/providers/inline/inference/vllm/vllm.py b/llama_stack/providers/inline/inference/vllm/vllm.py index 06abd0290..59ec0634b 100644 --- a/llama_stack/providers/inline/inference/vllm/vllm.py +++ b/llama_stack/providers/inline/inference/vllm/vllm.py @@ -172,8 +172,6 @@ def _convert_sampling_params( # vLLM allows top-p and top-k at the same time. vllm_sampling_params = vllm.SamplingParams.from_optional( max_tokens=(None if sampling_params.max_tokens == 0 else sampling_params.max_tokens), - # Assume that vLLM's default stop token will work - # stop_token_ids=[tokenizer.eos_token_id], temperature=vllm_temperature, top_p=vllm_top_p, top_k=vllm_top_k,