mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
Some checks failed
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
Unit Tests / unit-tests (3.13) (push) Failing after 3s
Update ReadTheDocs / update-readthedocs (push) Failing after 3s
Test Llama Stack Build / build (push) Failing after 3s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Test Llama Stack Build / generate-matrix (push) Successful in 3s
Python Package Build Test / build (3.12) (push) Failing after 1s
Python Package Build Test / build (3.13) (push) Failing after 2s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 3s
Test Llama Stack Build / build-single-provider (push) Failing after 3s
Vector IO Integration Tests / test-matrix (push) Failing after 5s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 4s
API Conformance Tests / check-schema-compatibility (push) Successful in 8s
Test External API and Providers / test-external (venv) (push) Failing after 3s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
UI Tests / ui-tests (22) (push) Successful in 40s
Pre-commit / pre-commit (push) Successful in 1m9s
# What does this PR do? - Mostly AI-generated scripts to run guidellm (https://github.com/vllm-project/guidellm) benchmarks on k8s setup - Stack is using image built from main on 9/11 ## Test Plan See updated README.md
103 lines
3.4 KiB
Bash
Executable file
103 lines
3.4 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
|
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
# Define benchmark configurations: (target, stack_replicas, vllm_replicas, stack_workers)
|
|
configs=(
|
|
"stack 1 1 1"
|
|
"stack 1 1 2"
|
|
"stack 1 1 4"
|
|
"vllm 1 1 -"
|
|
)
|
|
|
|
set -euo pipefail
|
|
|
|
# Get the directory where this script is located
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
echo "Running comprehensive GuideLL benchmark suite..."
|
|
echo "Start time: $(date)"
|
|
|
|
# Default deployment names
|
|
STACK_DEPLOYMENT="llama-stack-benchmark-server"
|
|
VLLM_DEPLOYMENT="vllm-server"
|
|
|
|
# Scaling function
|
|
scale_deployments() {
|
|
local stack_replicas=$1
|
|
local vllm_replicas=$2
|
|
local workers=$3
|
|
|
|
echo "Scaling deployments..."
|
|
|
|
if [[ "$vllm_replicas" != "-" ]]; then
|
|
echo "Scaling $VLLM_DEPLOYMENT to $vllm_replicas replicas..."
|
|
kubectl scale deployment $VLLM_DEPLOYMENT --replicas=$vllm_replicas
|
|
kubectl rollout status deployment $VLLM_DEPLOYMENT --timeout=600s
|
|
fi
|
|
|
|
if [[ "$target" == "stack" ]]; then
|
|
if [[ "$stack_replicas" != "-" ]]; then
|
|
echo "Scaling $STACK_DEPLOYMENT to $stack_replicas replicas..."
|
|
kubectl scale deployment $STACK_DEPLOYMENT --replicas=$stack_replicas
|
|
kubectl rollout status deployment $STACK_DEPLOYMENT --timeout=600s
|
|
fi
|
|
|
|
if [[ "$workers" != "-" ]]; then
|
|
echo "Updating $STACK_DEPLOYMENT to use $workers workers..."
|
|
kubectl set env deployment/$STACK_DEPLOYMENT LLAMA_STACK_WORKERS=$workers
|
|
kubectl rollout status deployment $STACK_DEPLOYMENT --timeout=600s
|
|
fi
|
|
fi
|
|
|
|
echo "All scaling operations completed. Waiting additional 30s for services to stabilize..."
|
|
sleep 30
|
|
}
|
|
|
|
|
|
for config in "${configs[@]}"; do
|
|
read -r target stack_replicas vllm_replicas workers <<< "$config"
|
|
|
|
echo ""
|
|
echo "=========================================="
|
|
if [[ "$workers" != "-" ]]; then
|
|
echo "Running benchmark: $target (stack=$stack_replicas, vllm=$vllm_replicas, workers=$workers)"
|
|
else
|
|
echo "Running benchmark: $target (stack=$stack_replicas, vllm=$vllm_replicas)"
|
|
fi
|
|
echo "Start: $(date)"
|
|
echo "=========================================="
|
|
|
|
# Scale deployments before running benchmark
|
|
scale_deployments "$stack_replicas" "$vllm_replicas" "$workers"
|
|
|
|
# Generate output filename with setup info
|
|
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
|
if [[ "$target" == "stack" ]]; then
|
|
OUTPUT_FILE="results/guidellm-benchmark-${target}-s${stack_replicas}-sw${workers}-v${vllm_replicas}-${TIMESTAMP}.txt"
|
|
else
|
|
OUTPUT_FILE="results/guidellm-benchmark-${target}-v${vllm_replicas}-${TIMESTAMP}.txt"
|
|
fi
|
|
|
|
# Run the benchmark with the cluster as configured
|
|
"$SCRIPT_DIR/run-guidellm-benchmark.sh" \
|
|
--target "$target" \
|
|
--output-file "$OUTPUT_FILE"
|
|
|
|
echo "Completed: $(date)"
|
|
echo "Waiting 30 seconds before next benchmark..."
|
|
sleep 30
|
|
done
|
|
|
|
echo ""
|
|
echo "=========================================="
|
|
echo "All benchmarks completed!"
|
|
echo "End time: $(date)"
|
|
echo "=========================================="
|
|
echo ""
|
|
echo "Results files generated:"
|
|
ls -la results/guidellm-*.txt results/guidellm-*.json 2>/dev/null || echo "No result files found"
|