Introduce GitHub Actions Workflow for Llama Stack Tests (#523)

# What does this PR do? Initial implementation of GitHub Actions workflow for automated testing of Llama Stack. ## Key Features - Automatically runs tests on pull requests and manual dispatch - Provides support for GPU required model tests - Reports test results and uploads summaries
2025-06-27 18:50:41 +00:00 · 2024-12-04 18:42:55 -05:00 · 2024-12-04 18:42:55 -05:00 · 144abd2e71
commit 144abd2e71
parent fcd6449519
1 changed files with 355 additions and 0 deletions
--- a/.github/workflows/gha_workflow_llama_stack_tests.yml
+++ b/.github/workflows/gha_workflow_llama_stack_tests.yml
@ -0,0 +1,355 @@
+name: "Run Llama-stack Tests"
+
+on:
+  #### Temporarily disable PR runs until tests run as intended within mainline.
+  #TODO Add this back.
+  #pull_request_target:
+  #  types: ["opened"]
+  #  branches:
+  #    - 'main'
+  #  paths:
+  #    - 'llama_stack/**/*.py'
+  #    - 'tests/**/*.py'
+
+  workflow_dispatch:
+    inputs:
+      runner:
+        description: 'GHA Runner Scale Set label to run workflow on.'
+        required: true
+        default: "llama-stack-gha-runner-gpu"
+
+      checkout_reference:
+        description: "The branch, tag, or SHA to checkout"
+        required: true
+        default: "main"
+
+      debug:
+        description: 'Run debugging steps?'
+        required: false
+        default: "true"
+
+      sleep_time:
+        description: '[DEBUG] sleep time for debugging'
+        required: true
+        default: "0"
+
+      provider_id:
+        description: 'ID of your provider'
+        required: true
+        default: "meta_reference"
+
+      model_id:
+        description: 'Shorthand name for target model ID (llama_3b or llama_8b)'
+        required: true
+        default: "llama_3b"
+
+      model_override_3b:
+        description: 'Specify shorthand model for <llama_3b> '
+        required: false
+        default: "Llama3.2-3B-Instruct"
+
+      model_override_8b:
+        description: 'Specify shorthand model for <llama_8b> '
+        required: false
+        default: "Llama3.1-8B-Instruct"
+
+env:
+  # ID used for each test's provider config
+  PROVIDER_ID: "${{ inputs.provider_id || 'meta_reference' }}"
+
+  # Path to model checkpoints within EFS volume
+  MODEL_CHECKPOINT_DIR: "/data/llama"
+
+  # Path to directory to run tests from
+  TESTS_PATH: "${{ github.workspace }}/llama_stack/providers/tests"
+
+  # Keep track of a list of model IDs that are valid to use within pytest fixture marks
+  AVAILABLE_MODEL_IDs: "llama_3b llama_8b"
+
+  # Shorthand name for model ID, used in pytest fixture marks
+  MODEL_ID: "${{ inputs.model_id || 'llama_3b' }}"
+
+  # Override the `llama_3b` / `llama_8b' models, else use the default.
+  LLAMA_3B_OVERRIDE: "${{ inputs.model_override_3b || 'Llama3.2-3B-Instruct' }}"
+  LLAMA_8B_OVERRIDE: "${{ inputs.model_override_8b || 'Llama3.1-8B-Instruct' }}"
+
+  # Defines which directories in TESTS_PATH to exclude from the test loop
+  EXCLUDED_DIRS: "__pycache__"
+
+  # Defines the output xml reports generated after a test is run
+  REPORTS_GEN: ""
+
+jobs:
+  execute_workflow:
+    name: Execute workload on Self-Hosted GPU k8s runner
+    permissions:
+      pull-requests: write
+    defaults:
+      run:
+        shell: bash
+    runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-gha-runner-gpu' }}
+    if: always()
+    steps:
+
+      ##############################
+      #### INITIAL DEBUG CHECKS ####
+      ##############################
+      - name: "[DEBUG] Check content of the EFS mount"
+        id: debug_efs_volume
+        continue-on-error: true
+        if: inputs.debug == 'true'
+        run: |
+            echo "========= Content of the EFS mount ============="
+            ls -la ${{ env.MODEL_CHECKPOINT_DIR }}
+
+      - name: "[DEBUG] Get runner container OS information"
+        id: debug_os_info
+        if: ${{ inputs.debug == 'true' }}
+        run: |
+            cat /etc/os-release
+
+      - name: "[DEBUG] Print environment variables"
+        id: debug_env_vars
+        if: ${{ inputs.debug == 'true' }}
+        run: |
+            echo "PROVIDER_ID = ${PROVIDER_ID}"
+            echo "MODEL_CHECKPOINT_DIR = ${MODEL_CHECKPOINT_DIR}"
+            echo "AVAILABLE_MODEL_IDs = ${AVAILABLE_MODEL_IDs}"
+            echo "MODEL_ID = ${MODEL_ID}"
+            echo "LLAMA_3B_OVERRIDE = ${LLAMA_3B_OVERRIDE}"
+            echo "LLAMA_8B_OVERRIDE = ${LLAMA_8B_OVERRIDE}"
+            echo "EXCLUDED_DIRS = ${EXCLUDED_DIRS}"
+            echo "REPORTS_GEN = ${REPORTS_GEN}"
+
+      ############################
+      #### MODEL INPUT CHECKS ####
+      ############################
+
+      - name: "Check if env.model_id is valid"
+        id: check_model_id
+        run: |
+          if [[ " ${AVAILABLE_MODEL_IDs[@]} " =~ " ${MODEL_ID} " ]]; then
+            echo "Model ID '${MODEL_ID}' is valid."
+          else
+            echo "Model ID '${MODEL_ID}' is invalid. Terminating workflow."
+            exit 1
+          fi
+
+      #######################
+      #### CODE CHECKOUT ####
+      #######################
+      - name: "Checkout 'meta-llama/llama-stack' repository"
+        id: checkout_repo
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.branch }}
+
+      - name: "[DEBUG] Content of the repository after checkout"
+        id: debug_content_after_checkout
+        if: ${{ inputs.debug == 'true' }}
+        run: |
+            ls -la ${GITHUB_WORKSPACE}
+
+      ##########################################################
+      ####              OPTIONAL SLEEP DEBUG                ####
+      #                                                        #
+      # Use to "exec" into the test k8s POD and run tests      #
+      # manually to identify what dependencies are being used. #
+      #                                                        #
+      ##########################################################
+      - name: "[DEBUG] sleep"
+        id: debug_sleep
+        if: ${{ inputs.debug == 'true' && inputs.sleep_time != '' }}
+        run: |
+            sleep ${{ inputs.sleep_time }}
+
+      ############################
+      #### UPDATE SYSTEM PATH ####
+      ############################
+      - name: "Update path: execute"
+        id: path_update_exec
+        run: |
+          # .local/bin is needed for certain libraries installed below to be recognized
+          # when calling their executable to install sub-dependencies
+          mkdir -p ${HOME}/.local/bin
+          echo "${HOME}/.local/bin" >> "$GITHUB_PATH"
+
+      #####################################
+      #### UPDATE CHECKPOINT DIRECTORY ####
+      #####################################
+      - name: "Update checkpoint directory"
+        id: checkpoint_update
+        run: |
+          echo "Checkpoint directory: ${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE"
+          if [ "${MODEL_ID}" = "llama_3b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_3B_OVERRIDE}" ]; then
+            echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_3B_OVERRIDE}" >> "$GITHUB_ENV"
+          elif [ "${MODEL_ID}" = "llama_8b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" ]; then
+            echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" >> "$GITHUB_ENV"
+          else
+            echo "MODEL_ID & LLAMA_*B_OVERRIDE are not a valid pairing. Terminating workflow."
+            exit 1
+          fi
+
+      - name: "[DEBUG] Checkpoint update check"
+        id: debug_checkpoint_update
+        if: ${{ inputs.debug == 'true' }}
+        run: |
+          echo "MODEL_CHECKPOINT_DIR (after update) = ${MODEL_CHECKPOINT_DIR}"
+
+      ##################################
+      #### DEPENDENCY INSTALLATIONS ####
+      ##################################
+      - name: "Installing 'apt' required packages"
+        id: install_apt
+        run: |
+          echo "[STEP] Installing 'apt' required packages"
+          sudo apt update -y
+          sudo apt install -y python3 python3-pip npm wget
+
+      - name: "Installing packages with 'curl'"
+        id: install_curl
+        run: |
+          curl -fsSL https://ollama.com/install.sh | sh
+
+      - name: "Installing packages with 'wget'"
+        id: install_wget
+        run: |
+          wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+          chmod +x Miniconda3-latest-Linux-x86_64.sh
+          ./Miniconda3-latest-Linux-x86_64.sh -b install -c pytorch -c nvidia faiss-gpu=1.9.0
+          # Add miniconda3 bin to system path
+          echo "${HOME}/miniconda3/bin" >> "$GITHUB_PATH"
+
+      - name: "Installing packages with 'npm'"
+        id: install_npm_generic
+        run: |
+          sudo npm install -g junit-merge
+
+      - name: "Installing pip dependencies"
+        id: install_pip_generic
+        run: |
+          echo "[STEP] Installing 'llama-stack' models"
+          pip install -U pip setuptools
+          pip install -r requirements.txt
+          pip install -e .
+          pip install -U \
+            torch torchvision \
+            pytest pytest_asyncio \
+            fairscale lm-format-enforcer \
+            zmq chardet pypdf \
+            pandas sentence_transformers together \
+            aiosqlite
+      - name: "Installing packages with conda"
+        id: install_conda_generic
+        run: |
+          conda install -q -c pytorch -c nvidia faiss-gpu=1.9.0
+
+      #############################################################
+      #### TESTING TO BE DONE FOR BOTH PRS AND MANUAL DISPATCH ####
+      #############################################################
+      - name: "Run Tests: Loop"
+        id: run_tests_loop
+        working-directory: "${{ github.workspace }}"
+        run: |
+          pattern=""
+          for dir in llama_stack/providers/tests/*; do
+            if [ -d "$dir" ]; then
+              dir_name=$(basename "$dir")
+              if [[ ! " $EXCLUDED_DIRS " =~ " $dir_name " ]]; then
+                for file in "$dir"/test_*.py; do
+                  test_name=$(basename "$file")
+                  new_file="result-${dir_name}-${test_name}.xml"
+                  if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and ${MODEL_ID}" \
+                     --junitxml="${{ github.workspace }}/${new_file}"; then
+                    echo "Ran test: ${test_name}"
+                  else
+                    echo "Did NOT run test: ${test_name}"
+                  fi
+                  pattern+="${new_file} "
+                done
+              fi
+            fi
+          done
+          echo "REPORTS_GEN=$pattern" >> "$GITHUB_ENV"
+
+      - name: "Test Summary: Merge"
+        id: test_summary_merge
+        working-directory: "${{ github.workspace }}"
+        run: |
+          echo "Merging the following test result files: ${REPORTS_GEN}"
+          # Defaults to merging them into 'merged-test-results.xml'
+          junit-merge ${{ env.REPORTS_GEN }}
+
+      ############################################
+      #### AUTOMATIC TESTING ON PULL REQUESTS ####
+      ############################################
+
+      #### Run tests ####
+
+      - name: "PR - Run Tests"
+        id: pr_run_tests
+        working-directory: "${{ github.workspace }}"
+        if: github.event_name == 'pull_request_target'
+        run: |
+          echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${GITHUB_WORKSPACE} | path: ${{ github.workspace }}"
+          # (Optional) Add more tests here.
+
+          # Merge test results with 'merged-test-results.xml' from above.
+          # junit-merge <new-test-results> merged-test-results.xml
+
+      #### Create test summary ####
+
+      - name: "PR - Test Summary"
+        id: pr_test_summary_create
+        if: github.event_name == 'pull_request_target'
+        uses: test-summary/action@v2
+        with:
+          paths: "${{ github.workspace }}/merged-test-results.xml"
+          output: test-summary.md
+
+      - name: "PR - Upload Test Summary"
+        id: pr_test_summary_upload
+        if: github.event_name == 'pull_request_target'
+        uses: actions/upload-artifact@v3
+        with:
+          name: test-summary
+          path: test-summary.md
+
+      #### Update PR request ####
+
+      - name: "PR - Update comment"
+        id: pr_update_comment
+        if: github.event_name == 'pull_request_target'
+        uses: thollander/actions-comment-pull-request@v2
+        with:
+          filePath: test-summary.md
+
+      ########################
+      #### MANUAL TESTING ####
+      ########################
+
+      #### Run tests ####
+
+      - name: "Manual - Run Tests: Prep"
+        id: manual_run_tests
+        working-directory: "${{ github.workspace }}"
+        if: github.event_name == 'workflow_dispatch'
+        run: |
+          echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${{ github.workspace }}"
+
+          #TODO Use this when collection errors are resolved
+          # pytest -s -v -m "${PROVIDER_ID} and ${MODEL_ID}" --junitxml="${{ github.workspace }}/merged-test-results.xml"
+
+          # (Optional) Add more tests here.
+
+          # Merge test results with 'merged-test-results.xml' from above.
+          # junit-merge <new-test-results> merged-test-results.xml
+
+      #### Create test summary ####
+
+      - name: "Manual - Test Summary"
+        id: manual_test_summary
+        if: always() && github.event_name == 'workflow_dispatch'
+        uses: test-summary/action@v2
+        with:
+          paths: "${{ github.workspace }}/merged-test-results.xml"