mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
Introduce GitHub Actions Workflow for Llama Stack Tests (#523)
# What does this PR do? Initial implementation of GitHub Actions workflow for automated testing of Llama Stack. ## Key Features - Automatically runs tests on pull requests and manual dispatch - Provides support for GPU required model tests - Reports test results and uploads summaries
This commit is contained in:
parent
fcd6449519
commit
144abd2e71
1 changed files with 355 additions and 0 deletions
355
.github/workflows/gha_workflow_llama_stack_tests.yml
vendored
Normal file
355
.github/workflows/gha_workflow_llama_stack_tests.yml
vendored
Normal file
|
@ -0,0 +1,355 @@
|
|||
name: "Run Llama-stack Tests"
|
||||
|
||||
on:
|
||||
#### Temporarily disable PR runs until tests run as intended within mainline.
|
||||
#TODO Add this back.
|
||||
#pull_request_target:
|
||||
# types: ["opened"]
|
||||
# branches:
|
||||
# - 'main'
|
||||
# paths:
|
||||
# - 'llama_stack/**/*.py'
|
||||
# - 'tests/**/*.py'
|
||||
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
runner:
|
||||
description: 'GHA Runner Scale Set label to run workflow on.'
|
||||
required: true
|
||||
default: "llama-stack-gha-runner-gpu"
|
||||
|
||||
checkout_reference:
|
||||
description: "The branch, tag, or SHA to checkout"
|
||||
required: true
|
||||
default: "main"
|
||||
|
||||
debug:
|
||||
description: 'Run debugging steps?'
|
||||
required: false
|
||||
default: "true"
|
||||
|
||||
sleep_time:
|
||||
description: '[DEBUG] sleep time for debugging'
|
||||
required: true
|
||||
default: "0"
|
||||
|
||||
provider_id:
|
||||
description: 'ID of your provider'
|
||||
required: true
|
||||
default: "meta_reference"
|
||||
|
||||
model_id:
|
||||
description: 'Shorthand name for target model ID (llama_3b or llama_8b)'
|
||||
required: true
|
||||
default: "llama_3b"
|
||||
|
||||
model_override_3b:
|
||||
description: 'Specify shorthand model for <llama_3b> '
|
||||
required: false
|
||||
default: "Llama3.2-3B-Instruct"
|
||||
|
||||
model_override_8b:
|
||||
description: 'Specify shorthand model for <llama_8b> '
|
||||
required: false
|
||||
default: "Llama3.1-8B-Instruct"
|
||||
|
||||
env:
|
||||
# ID used for each test's provider config
|
||||
PROVIDER_ID: "${{ inputs.provider_id || 'meta_reference' }}"
|
||||
|
||||
# Path to model checkpoints within EFS volume
|
||||
MODEL_CHECKPOINT_DIR: "/data/llama"
|
||||
|
||||
# Path to directory to run tests from
|
||||
TESTS_PATH: "${{ github.workspace }}/llama_stack/providers/tests"
|
||||
|
||||
# Keep track of a list of model IDs that are valid to use within pytest fixture marks
|
||||
AVAILABLE_MODEL_IDs: "llama_3b llama_8b"
|
||||
|
||||
# Shorthand name for model ID, used in pytest fixture marks
|
||||
MODEL_ID: "${{ inputs.model_id || 'llama_3b' }}"
|
||||
|
||||
# Override the `llama_3b` / `llama_8b' models, else use the default.
|
||||
LLAMA_3B_OVERRIDE: "${{ inputs.model_override_3b || 'Llama3.2-3B-Instruct' }}"
|
||||
LLAMA_8B_OVERRIDE: "${{ inputs.model_override_8b || 'Llama3.1-8B-Instruct' }}"
|
||||
|
||||
# Defines which directories in TESTS_PATH to exclude from the test loop
|
||||
EXCLUDED_DIRS: "__pycache__"
|
||||
|
||||
# Defines the output xml reports generated after a test is run
|
||||
REPORTS_GEN: ""
|
||||
|
||||
jobs:
|
||||
execute_workflow:
|
||||
name: Execute workload on Self-Hosted GPU k8s runner
|
||||
permissions:
|
||||
pull-requests: write
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-gha-runner-gpu' }}
|
||||
if: always()
|
||||
steps:
|
||||
|
||||
##############################
|
||||
#### INITIAL DEBUG CHECKS ####
|
||||
##############################
|
||||
- name: "[DEBUG] Check content of the EFS mount"
|
||||
id: debug_efs_volume
|
||||
continue-on-error: true
|
||||
if: inputs.debug == 'true'
|
||||
run: |
|
||||
echo "========= Content of the EFS mount ============="
|
||||
ls -la ${{ env.MODEL_CHECKPOINT_DIR }}
|
||||
|
||||
- name: "[DEBUG] Get runner container OS information"
|
||||
id: debug_os_info
|
||||
if: ${{ inputs.debug == 'true' }}
|
||||
run: |
|
||||
cat /etc/os-release
|
||||
|
||||
- name: "[DEBUG] Print environment variables"
|
||||
id: debug_env_vars
|
||||
if: ${{ inputs.debug == 'true' }}
|
||||
run: |
|
||||
echo "PROVIDER_ID = ${PROVIDER_ID}"
|
||||
echo "MODEL_CHECKPOINT_DIR = ${MODEL_CHECKPOINT_DIR}"
|
||||
echo "AVAILABLE_MODEL_IDs = ${AVAILABLE_MODEL_IDs}"
|
||||
echo "MODEL_ID = ${MODEL_ID}"
|
||||
echo "LLAMA_3B_OVERRIDE = ${LLAMA_3B_OVERRIDE}"
|
||||
echo "LLAMA_8B_OVERRIDE = ${LLAMA_8B_OVERRIDE}"
|
||||
echo "EXCLUDED_DIRS = ${EXCLUDED_DIRS}"
|
||||
echo "REPORTS_GEN = ${REPORTS_GEN}"
|
||||
|
||||
############################
|
||||
#### MODEL INPUT CHECKS ####
|
||||
############################
|
||||
|
||||
- name: "Check if env.model_id is valid"
|
||||
id: check_model_id
|
||||
run: |
|
||||
if [[ " ${AVAILABLE_MODEL_IDs[@]} " =~ " ${MODEL_ID} " ]]; then
|
||||
echo "Model ID '${MODEL_ID}' is valid."
|
||||
else
|
||||
echo "Model ID '${MODEL_ID}' is invalid. Terminating workflow."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
#######################
|
||||
#### CODE CHECKOUT ####
|
||||
#######################
|
||||
- name: "Checkout 'meta-llama/llama-stack' repository"
|
||||
id: checkout_repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ inputs.branch }}
|
||||
|
||||
- name: "[DEBUG] Content of the repository after checkout"
|
||||
id: debug_content_after_checkout
|
||||
if: ${{ inputs.debug == 'true' }}
|
||||
run: |
|
||||
ls -la ${GITHUB_WORKSPACE}
|
||||
|
||||
##########################################################
|
||||
#### OPTIONAL SLEEP DEBUG ####
|
||||
# #
|
||||
# Use to "exec" into the test k8s POD and run tests #
|
||||
# manually to identify what dependencies are being used. #
|
||||
# #
|
||||
##########################################################
|
||||
- name: "[DEBUG] sleep"
|
||||
id: debug_sleep
|
||||
if: ${{ inputs.debug == 'true' && inputs.sleep_time != '' }}
|
||||
run: |
|
||||
sleep ${{ inputs.sleep_time }}
|
||||
|
||||
############################
|
||||
#### UPDATE SYSTEM PATH ####
|
||||
############################
|
||||
- name: "Update path: execute"
|
||||
id: path_update_exec
|
||||
run: |
|
||||
# .local/bin is needed for certain libraries installed below to be recognized
|
||||
# when calling their executable to install sub-dependencies
|
||||
mkdir -p ${HOME}/.local/bin
|
||||
echo "${HOME}/.local/bin" >> "$GITHUB_PATH"
|
||||
|
||||
#####################################
|
||||
#### UPDATE CHECKPOINT DIRECTORY ####
|
||||
#####################################
|
||||
- name: "Update checkpoint directory"
|
||||
id: checkpoint_update
|
||||
run: |
|
||||
echo "Checkpoint directory: ${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE"
|
||||
if [ "${MODEL_ID}" = "llama_3b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_3B_OVERRIDE}" ]; then
|
||||
echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_3B_OVERRIDE}" >> "$GITHUB_ENV"
|
||||
elif [ "${MODEL_ID}" = "llama_8b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" ]; then
|
||||
echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" >> "$GITHUB_ENV"
|
||||
else
|
||||
echo "MODEL_ID & LLAMA_*B_OVERRIDE are not a valid pairing. Terminating workflow."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: "[DEBUG] Checkpoint update check"
|
||||
id: debug_checkpoint_update
|
||||
if: ${{ inputs.debug == 'true' }}
|
||||
run: |
|
||||
echo "MODEL_CHECKPOINT_DIR (after update) = ${MODEL_CHECKPOINT_DIR}"
|
||||
|
||||
##################################
|
||||
#### DEPENDENCY INSTALLATIONS ####
|
||||
##################################
|
||||
- name: "Installing 'apt' required packages"
|
||||
id: install_apt
|
||||
run: |
|
||||
echo "[STEP] Installing 'apt' required packages"
|
||||
sudo apt update -y
|
||||
sudo apt install -y python3 python3-pip npm wget
|
||||
|
||||
- name: "Installing packages with 'curl'"
|
||||
id: install_curl
|
||||
run: |
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
|
||||
- name: "Installing packages with 'wget'"
|
||||
id: install_wget
|
||||
run: |
|
||||
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
|
||||
chmod +x Miniconda3-latest-Linux-x86_64.sh
|
||||
./Miniconda3-latest-Linux-x86_64.sh -b install -c pytorch -c nvidia faiss-gpu=1.9.0
|
||||
# Add miniconda3 bin to system path
|
||||
echo "${HOME}/miniconda3/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: "Installing packages with 'npm'"
|
||||
id: install_npm_generic
|
||||
run: |
|
||||
sudo npm install -g junit-merge
|
||||
|
||||
- name: "Installing pip dependencies"
|
||||
id: install_pip_generic
|
||||
run: |
|
||||
echo "[STEP] Installing 'llama-stack' models"
|
||||
pip install -U pip setuptools
|
||||
pip install -r requirements.txt
|
||||
pip install -e .
|
||||
pip install -U \
|
||||
torch torchvision \
|
||||
pytest pytest_asyncio \
|
||||
fairscale lm-format-enforcer \
|
||||
zmq chardet pypdf \
|
||||
pandas sentence_transformers together \
|
||||
aiosqlite
|
||||
- name: "Installing packages with conda"
|
||||
id: install_conda_generic
|
||||
run: |
|
||||
conda install -q -c pytorch -c nvidia faiss-gpu=1.9.0
|
||||
|
||||
#############################################################
|
||||
#### TESTING TO BE DONE FOR BOTH PRS AND MANUAL DISPATCH ####
|
||||
#############################################################
|
||||
- name: "Run Tests: Loop"
|
||||
id: run_tests_loop
|
||||
working-directory: "${{ github.workspace }}"
|
||||
run: |
|
||||
pattern=""
|
||||
for dir in llama_stack/providers/tests/*; do
|
||||
if [ -d "$dir" ]; then
|
||||
dir_name=$(basename "$dir")
|
||||
if [[ ! " $EXCLUDED_DIRS " =~ " $dir_name " ]]; then
|
||||
for file in "$dir"/test_*.py; do
|
||||
test_name=$(basename "$file")
|
||||
new_file="result-${dir_name}-${test_name}.xml"
|
||||
if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and ${MODEL_ID}" \
|
||||
--junitxml="${{ github.workspace }}/${new_file}"; then
|
||||
echo "Ran test: ${test_name}"
|
||||
else
|
||||
echo "Did NOT run test: ${test_name}"
|
||||
fi
|
||||
pattern+="${new_file} "
|
||||
done
|
||||
fi
|
||||
fi
|
||||
done
|
||||
echo "REPORTS_GEN=$pattern" >> "$GITHUB_ENV"
|
||||
|
||||
- name: "Test Summary: Merge"
|
||||
id: test_summary_merge
|
||||
working-directory: "${{ github.workspace }}"
|
||||
run: |
|
||||
echo "Merging the following test result files: ${REPORTS_GEN}"
|
||||
# Defaults to merging them into 'merged-test-results.xml'
|
||||
junit-merge ${{ env.REPORTS_GEN }}
|
||||
|
||||
############################################
|
||||
#### AUTOMATIC TESTING ON PULL REQUESTS ####
|
||||
############################################
|
||||
|
||||
#### Run tests ####
|
||||
|
||||
- name: "PR - Run Tests"
|
||||
id: pr_run_tests
|
||||
working-directory: "${{ github.workspace }}"
|
||||
if: github.event_name == 'pull_request_target'
|
||||
run: |
|
||||
echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${GITHUB_WORKSPACE} | path: ${{ github.workspace }}"
|
||||
# (Optional) Add more tests here.
|
||||
|
||||
# Merge test results with 'merged-test-results.xml' from above.
|
||||
# junit-merge <new-test-results> merged-test-results.xml
|
||||
|
||||
#### Create test summary ####
|
||||
|
||||
- name: "PR - Test Summary"
|
||||
id: pr_test_summary_create
|
||||
if: github.event_name == 'pull_request_target'
|
||||
uses: test-summary/action@v2
|
||||
with:
|
||||
paths: "${{ github.workspace }}/merged-test-results.xml"
|
||||
output: test-summary.md
|
||||
|
||||
- name: "PR - Upload Test Summary"
|
||||
id: pr_test_summary_upload
|
||||
if: github.event_name == 'pull_request_target'
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: test-summary
|
||||
path: test-summary.md
|
||||
|
||||
#### Update PR request ####
|
||||
|
||||
- name: "PR - Update comment"
|
||||
id: pr_update_comment
|
||||
if: github.event_name == 'pull_request_target'
|
||||
uses: thollander/actions-comment-pull-request@v2
|
||||
with:
|
||||
filePath: test-summary.md
|
||||
|
||||
########################
|
||||
#### MANUAL TESTING ####
|
||||
########################
|
||||
|
||||
#### Run tests ####
|
||||
|
||||
- name: "Manual - Run Tests: Prep"
|
||||
id: manual_run_tests
|
||||
working-directory: "${{ github.workspace }}"
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${{ github.workspace }}"
|
||||
|
||||
#TODO Use this when collection errors are resolved
|
||||
# pytest -s -v -m "${PROVIDER_ID} and ${MODEL_ID}" --junitxml="${{ github.workspace }}/merged-test-results.xml"
|
||||
|
||||
# (Optional) Add more tests here.
|
||||
|
||||
# Merge test results with 'merged-test-results.xml' from above.
|
||||
# junit-merge <new-test-results> merged-test-results.xml
|
||||
|
||||
#### Create test summary ####
|
||||
|
||||
- name: "Manual - Test Summary"
|
||||
id: manual_test_summary
|
||||
if: always() && github.event_name == 'workflow_dispatch'
|
||||
uses: test-summary/action@v2
|
||||
with:
|
||||
paths: "${{ github.workspace }}/merged-test-results.xml"
|
Loading…
Add table
Add a link
Reference in a new issue