mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
It preforms better in tool calling and structured tests Signed-off-by: Derek Higgins <derekh@redhat.com>
28 lines
811 B
YAML
28 lines
811 B
YAML
name: Setup VLLM
|
|
description: Start VLLM
|
|
runs:
|
|
using: "composite"
|
|
steps:
|
|
- name: Start VLLM
|
|
shell: bash
|
|
run: |
|
|
# Start vllm container
|
|
docker run -d \
|
|
--name vllm \
|
|
-p 8000:8000 \
|
|
--privileged=true \
|
|
quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \
|
|
--host 0.0.0.0 \
|
|
--port 8000 \
|
|
--enable-auto-tool-choice \
|
|
--tool-call-parser hermes \
|
|
--model /root/.cache/Qwen3-0.6B \
|
|
--served-model-name Qwen/Qwen3-0.6B \
|
|
--max-model-len 8192
|
|
|
|
# Wait for vllm to be ready
|
|
echo "Waiting for vllm to be ready..."
|
|
timeout 900 bash -c 'until curl -f http://localhost:8000/health; do
|
|
echo "Waiting for vllm..."
|
|
sleep 5
|
|
done'
|