mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
Introduce GitHub Actions Workflow for Llama Stack Tests (#523)
# What does this PR do? Initial implementation of GitHub Actions workflow for automated testing of Llama Stack. ## Key Features - Automatically runs tests on pull requests and manual dispatch - Provides support for GPU required model tests - Reports test results and uploads summaries
This commit is contained in:
parent
fcd6449519
commit
144abd2e71
1 changed files with 355 additions and 0 deletions
355
.github/workflows/gha_workflow_llama_stack_tests.yml
vendored
Normal file
355
.github/workflows/gha_workflow_llama_stack_tests.yml
vendored
Normal file
|
@ -0,0 +1,355 @@
|
||||||
|
name: "Run Llama-stack Tests"
|
||||||
|
|
||||||
|
on:
|
||||||
|
#### Temporarily disable PR runs until tests run as intended within mainline.
|
||||||
|
#TODO Add this back.
|
||||||
|
#pull_request_target:
|
||||||
|
# types: ["opened"]
|
||||||
|
# branches:
|
||||||
|
# - 'main'
|
||||||
|
# paths:
|
||||||
|
# - 'llama_stack/**/*.py'
|
||||||
|
# - 'tests/**/*.py'
|
||||||
|
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
runner:
|
||||||
|
description: 'GHA Runner Scale Set label to run workflow on.'
|
||||||
|
required: true
|
||||||
|
default: "llama-stack-gha-runner-gpu"
|
||||||
|
|
||||||
|
checkout_reference:
|
||||||
|
description: "The branch, tag, or SHA to checkout"
|
||||||
|
required: true
|
||||||
|
default: "main"
|
||||||
|
|
||||||
|
debug:
|
||||||
|
description: 'Run debugging steps?'
|
||||||
|
required: false
|
||||||
|
default: "true"
|
||||||
|
|
||||||
|
sleep_time:
|
||||||
|
description: '[DEBUG] sleep time for debugging'
|
||||||
|
required: true
|
||||||
|
default: "0"
|
||||||
|
|
||||||
|
provider_id:
|
||||||
|
description: 'ID of your provider'
|
||||||
|
required: true
|
||||||
|
default: "meta_reference"
|
||||||
|
|
||||||
|
model_id:
|
||||||
|
description: 'Shorthand name for target model ID (llama_3b or llama_8b)'
|
||||||
|
required: true
|
||||||
|
default: "llama_3b"
|
||||||
|
|
||||||
|
model_override_3b:
|
||||||
|
description: 'Specify shorthand model for <llama_3b> '
|
||||||
|
required: false
|
||||||
|
default: "Llama3.2-3B-Instruct"
|
||||||
|
|
||||||
|
model_override_8b:
|
||||||
|
description: 'Specify shorthand model for <llama_8b> '
|
||||||
|
required: false
|
||||||
|
default: "Llama3.1-8B-Instruct"
|
||||||
|
|
||||||
|
env:
|
||||||
|
# ID used for each test's provider config
|
||||||
|
PROVIDER_ID: "${{ inputs.provider_id || 'meta_reference' }}"
|
||||||
|
|
||||||
|
# Path to model checkpoints within EFS volume
|
||||||
|
MODEL_CHECKPOINT_DIR: "/data/llama"
|
||||||
|
|
||||||
|
# Path to directory to run tests from
|
||||||
|
TESTS_PATH: "${{ github.workspace }}/llama_stack/providers/tests"
|
||||||
|
|
||||||
|
# Keep track of a list of model IDs that are valid to use within pytest fixture marks
|
||||||
|
AVAILABLE_MODEL_IDs: "llama_3b llama_8b"
|
||||||
|
|
||||||
|
# Shorthand name for model ID, used in pytest fixture marks
|
||||||
|
MODEL_ID: "${{ inputs.model_id || 'llama_3b' }}"
|
||||||
|
|
||||||
|
# Override the `llama_3b` / `llama_8b' models, else use the default.
|
||||||
|
LLAMA_3B_OVERRIDE: "${{ inputs.model_override_3b || 'Llama3.2-3B-Instruct' }}"
|
||||||
|
LLAMA_8B_OVERRIDE: "${{ inputs.model_override_8b || 'Llama3.1-8B-Instruct' }}"
|
||||||
|
|
||||||
|
# Defines which directories in TESTS_PATH to exclude from the test loop
|
||||||
|
EXCLUDED_DIRS: "__pycache__"
|
||||||
|
|
||||||
|
# Defines the output xml reports generated after a test is run
|
||||||
|
REPORTS_GEN: ""
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
execute_workflow:
|
||||||
|
name: Execute workload on Self-Hosted GPU k8s runner
|
||||||
|
permissions:
|
||||||
|
pull-requests: write
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash
|
||||||
|
runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-gha-runner-gpu' }}
|
||||||
|
if: always()
|
||||||
|
steps:
|
||||||
|
|
||||||
|
##############################
|
||||||
|
#### INITIAL DEBUG CHECKS ####
|
||||||
|
##############################
|
||||||
|
- name: "[DEBUG] Check content of the EFS mount"
|
||||||
|
id: debug_efs_volume
|
||||||
|
continue-on-error: true
|
||||||
|
if: inputs.debug == 'true'
|
||||||
|
run: |
|
||||||
|
echo "========= Content of the EFS mount ============="
|
||||||
|
ls -la ${{ env.MODEL_CHECKPOINT_DIR }}
|
||||||
|
|
||||||
|
- name: "[DEBUG] Get runner container OS information"
|
||||||
|
id: debug_os_info
|
||||||
|
if: ${{ inputs.debug == 'true' }}
|
||||||
|
run: |
|
||||||
|
cat /etc/os-release
|
||||||
|
|
||||||
|
- name: "[DEBUG] Print environment variables"
|
||||||
|
id: debug_env_vars
|
||||||
|
if: ${{ inputs.debug == 'true' }}
|
||||||
|
run: |
|
||||||
|
echo "PROVIDER_ID = ${PROVIDER_ID}"
|
||||||
|
echo "MODEL_CHECKPOINT_DIR = ${MODEL_CHECKPOINT_DIR}"
|
||||||
|
echo "AVAILABLE_MODEL_IDs = ${AVAILABLE_MODEL_IDs}"
|
||||||
|
echo "MODEL_ID = ${MODEL_ID}"
|
||||||
|
echo "LLAMA_3B_OVERRIDE = ${LLAMA_3B_OVERRIDE}"
|
||||||
|
echo "LLAMA_8B_OVERRIDE = ${LLAMA_8B_OVERRIDE}"
|
||||||
|
echo "EXCLUDED_DIRS = ${EXCLUDED_DIRS}"
|
||||||
|
echo "REPORTS_GEN = ${REPORTS_GEN}"
|
||||||
|
|
||||||
|
############################
|
||||||
|
#### MODEL INPUT CHECKS ####
|
||||||
|
############################
|
||||||
|
|
||||||
|
- name: "Check if env.model_id is valid"
|
||||||
|
id: check_model_id
|
||||||
|
run: |
|
||||||
|
if [[ " ${AVAILABLE_MODEL_IDs[@]} " =~ " ${MODEL_ID} " ]]; then
|
||||||
|
echo "Model ID '${MODEL_ID}' is valid."
|
||||||
|
else
|
||||||
|
echo "Model ID '${MODEL_ID}' is invalid. Terminating workflow."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
#######################
|
||||||
|
#### CODE CHECKOUT ####
|
||||||
|
#######################
|
||||||
|
- name: "Checkout 'meta-llama/llama-stack' repository"
|
||||||
|
id: checkout_repo
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ inputs.branch }}
|
||||||
|
|
||||||
|
- name: "[DEBUG] Content of the repository after checkout"
|
||||||
|
id: debug_content_after_checkout
|
||||||
|
if: ${{ inputs.debug == 'true' }}
|
||||||
|
run: |
|
||||||
|
ls -la ${GITHUB_WORKSPACE}
|
||||||
|
|
||||||
|
##########################################################
|
||||||
|
#### OPTIONAL SLEEP DEBUG ####
|
||||||
|
# #
|
||||||
|
# Use to "exec" into the test k8s POD and run tests #
|
||||||
|
# manually to identify what dependencies are being used. #
|
||||||
|
# #
|
||||||
|
##########################################################
|
||||||
|
- name: "[DEBUG] sleep"
|
||||||
|
id: debug_sleep
|
||||||
|
if: ${{ inputs.debug == 'true' && inputs.sleep_time != '' }}
|
||||||
|
run: |
|
||||||
|
sleep ${{ inputs.sleep_time }}
|
||||||
|
|
||||||
|
############################
|
||||||
|
#### UPDATE SYSTEM PATH ####
|
||||||
|
############################
|
||||||
|
- name: "Update path: execute"
|
||||||
|
id: path_update_exec
|
||||||
|
run: |
|
||||||
|
# .local/bin is needed for certain libraries installed below to be recognized
|
||||||
|
# when calling their executable to install sub-dependencies
|
||||||
|
mkdir -p ${HOME}/.local/bin
|
||||||
|
echo "${HOME}/.local/bin" >> "$GITHUB_PATH"
|
||||||
|
|
||||||
|
#####################################
|
||||||
|
#### UPDATE CHECKPOINT DIRECTORY ####
|
||||||
|
#####################################
|
||||||
|
- name: "Update checkpoint directory"
|
||||||
|
id: checkpoint_update
|
||||||
|
run: |
|
||||||
|
echo "Checkpoint directory: ${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE"
|
||||||
|
if [ "${MODEL_ID}" = "llama_3b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_3B_OVERRIDE}" ]; then
|
||||||
|
echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_3B_OVERRIDE}" >> "$GITHUB_ENV"
|
||||||
|
elif [ "${MODEL_ID}" = "llama_8b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" ]; then
|
||||||
|
echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" >> "$GITHUB_ENV"
|
||||||
|
else
|
||||||
|
echo "MODEL_ID & LLAMA_*B_OVERRIDE are not a valid pairing. Terminating workflow."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: "[DEBUG] Checkpoint update check"
|
||||||
|
id: debug_checkpoint_update
|
||||||
|
if: ${{ inputs.debug == 'true' }}
|
||||||
|
run: |
|
||||||
|
echo "MODEL_CHECKPOINT_DIR (after update) = ${MODEL_CHECKPOINT_DIR}"
|
||||||
|
|
||||||
|
##################################
|
||||||
|
#### DEPENDENCY INSTALLATIONS ####
|
||||||
|
##################################
|
||||||
|
- name: "Installing 'apt' required packages"
|
||||||
|
id: install_apt
|
||||||
|
run: |
|
||||||
|
echo "[STEP] Installing 'apt' required packages"
|
||||||
|
sudo apt update -y
|
||||||
|
sudo apt install -y python3 python3-pip npm wget
|
||||||
|
|
||||||
|
- name: "Installing packages with 'curl'"
|
||||||
|
id: install_curl
|
||||||
|
run: |
|
||||||
|
curl -fsSL https://ollama.com/install.sh | sh
|
||||||
|
|
||||||
|
- name: "Installing packages with 'wget'"
|
||||||
|
id: install_wget
|
||||||
|
run: |
|
||||||
|
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
|
||||||
|
chmod +x Miniconda3-latest-Linux-x86_64.sh
|
||||||
|
./Miniconda3-latest-Linux-x86_64.sh -b install -c pytorch -c nvidia faiss-gpu=1.9.0
|
||||||
|
# Add miniconda3 bin to system path
|
||||||
|
echo "${HOME}/miniconda3/bin" >> "$GITHUB_PATH"
|
||||||
|
|
||||||
|
- name: "Installing packages with 'npm'"
|
||||||
|
id: install_npm_generic
|
||||||
|
run: |
|
||||||
|
sudo npm install -g junit-merge
|
||||||
|
|
||||||
|
- name: "Installing pip dependencies"
|
||||||
|
id: install_pip_generic
|
||||||
|
run: |
|
||||||
|
echo "[STEP] Installing 'llama-stack' models"
|
||||||
|
pip install -U pip setuptools
|
||||||
|
pip install -r requirements.txt
|
||||||
|
pip install -e .
|
||||||
|
pip install -U \
|
||||||
|
torch torchvision \
|
||||||
|
pytest pytest_asyncio \
|
||||||
|
fairscale lm-format-enforcer \
|
||||||
|
zmq chardet pypdf \
|
||||||
|
pandas sentence_transformers together \
|
||||||
|
aiosqlite
|
||||||
|
- name: "Installing packages with conda"
|
||||||
|
id: install_conda_generic
|
||||||
|
run: |
|
||||||
|
conda install -q -c pytorch -c nvidia faiss-gpu=1.9.0
|
||||||
|
|
||||||
|
#############################################################
|
||||||
|
#### TESTING TO BE DONE FOR BOTH PRS AND MANUAL DISPATCH ####
|
||||||
|
#############################################################
|
||||||
|
- name: "Run Tests: Loop"
|
||||||
|
id: run_tests_loop
|
||||||
|
working-directory: "${{ github.workspace }}"
|
||||||
|
run: |
|
||||||
|
pattern=""
|
||||||
|
for dir in llama_stack/providers/tests/*; do
|
||||||
|
if [ -d "$dir" ]; then
|
||||||
|
dir_name=$(basename "$dir")
|
||||||
|
if [[ ! " $EXCLUDED_DIRS " =~ " $dir_name " ]]; then
|
||||||
|
for file in "$dir"/test_*.py; do
|
||||||
|
test_name=$(basename "$file")
|
||||||
|
new_file="result-${dir_name}-${test_name}.xml"
|
||||||
|
if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and ${MODEL_ID}" \
|
||||||
|
--junitxml="${{ github.workspace }}/${new_file}"; then
|
||||||
|
echo "Ran test: ${test_name}"
|
||||||
|
else
|
||||||
|
echo "Did NOT run test: ${test_name}"
|
||||||
|
fi
|
||||||
|
pattern+="${new_file} "
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
echo "REPORTS_GEN=$pattern" >> "$GITHUB_ENV"
|
||||||
|
|
||||||
|
- name: "Test Summary: Merge"
|
||||||
|
id: test_summary_merge
|
||||||
|
working-directory: "${{ github.workspace }}"
|
||||||
|
run: |
|
||||||
|
echo "Merging the following test result files: ${REPORTS_GEN}"
|
||||||
|
# Defaults to merging them into 'merged-test-results.xml'
|
||||||
|
junit-merge ${{ env.REPORTS_GEN }}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
#### AUTOMATIC TESTING ON PULL REQUESTS ####
|
||||||
|
############################################
|
||||||
|
|
||||||
|
#### Run tests ####
|
||||||
|
|
||||||
|
- name: "PR - Run Tests"
|
||||||
|
id: pr_run_tests
|
||||||
|
working-directory: "${{ github.workspace }}"
|
||||||
|
if: github.event_name == 'pull_request_target'
|
||||||
|
run: |
|
||||||
|
echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${GITHUB_WORKSPACE} | path: ${{ github.workspace }}"
|
||||||
|
# (Optional) Add more tests here.
|
||||||
|
|
||||||
|
# Merge test results with 'merged-test-results.xml' from above.
|
||||||
|
# junit-merge <new-test-results> merged-test-results.xml
|
||||||
|
|
||||||
|
#### Create test summary ####
|
||||||
|
|
||||||
|
- name: "PR - Test Summary"
|
||||||
|
id: pr_test_summary_create
|
||||||
|
if: github.event_name == 'pull_request_target'
|
||||||
|
uses: test-summary/action@v2
|
||||||
|
with:
|
||||||
|
paths: "${{ github.workspace }}/merged-test-results.xml"
|
||||||
|
output: test-summary.md
|
||||||
|
|
||||||
|
- name: "PR - Upload Test Summary"
|
||||||
|
id: pr_test_summary_upload
|
||||||
|
if: github.event_name == 'pull_request_target'
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: test-summary
|
||||||
|
path: test-summary.md
|
||||||
|
|
||||||
|
#### Update PR request ####
|
||||||
|
|
||||||
|
- name: "PR - Update comment"
|
||||||
|
id: pr_update_comment
|
||||||
|
if: github.event_name == 'pull_request_target'
|
||||||
|
uses: thollander/actions-comment-pull-request@v2
|
||||||
|
with:
|
||||||
|
filePath: test-summary.md
|
||||||
|
|
||||||
|
########################
|
||||||
|
#### MANUAL TESTING ####
|
||||||
|
########################
|
||||||
|
|
||||||
|
#### Run tests ####
|
||||||
|
|
||||||
|
- name: "Manual - Run Tests: Prep"
|
||||||
|
id: manual_run_tests
|
||||||
|
working-directory: "${{ github.workspace }}"
|
||||||
|
if: github.event_name == 'workflow_dispatch'
|
||||||
|
run: |
|
||||||
|
echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${{ github.workspace }}"
|
||||||
|
|
||||||
|
#TODO Use this when collection errors are resolved
|
||||||
|
# pytest -s -v -m "${PROVIDER_ID} and ${MODEL_ID}" --junitxml="${{ github.workspace }}/merged-test-results.xml"
|
||||||
|
|
||||||
|
# (Optional) Add more tests here.
|
||||||
|
|
||||||
|
# Merge test results with 'merged-test-results.xml' from above.
|
||||||
|
# junit-merge <new-test-results> merged-test-results.xml
|
||||||
|
|
||||||
|
#### Create test summary ####
|
||||||
|
|
||||||
|
- name: "Manual - Test Summary"
|
||||||
|
id: manual_test_summary
|
||||||
|
if: always() && github.event_name == 'workflow_dispatch'
|
||||||
|
uses: test-summary/action@v2
|
||||||
|
with:
|
||||||
|
paths: "${{ github.workspace }}/merged-test-results.xml"
|
Loading…
Add table
Add a link
Reference in a new issue