name: Integration Tests on: push: branches: [ main ] pull_request: branches: [ main ] paths: - 'distributions/**' - 'llama_stack/**' - 'tests/integration/**' - 'uv.lock' - 'pyproject.toml' - 'requirements.txt' - '.github/workflows/integration-tests.yml' # This workflow concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: test-matrix: runs-on: ubuntu-latest strategy: matrix: # Listing tests manually since some of them currently fail # TODO: generate matrix list from tests/integration when fixed test-type: [inference, datasets, inspect, scoring, post_training, providers] fail-fast: false # we want to run all tests regardless of failure steps: - name: Checkout repository uses: actions/checkout@v4 - name: Install uv uses: astral-sh/setup-uv@v5 with: python-version: "3.10" - name: Install Ollama run: | curl -fsSL https://ollama.com/install.sh | sh - name: Pull Ollama image run: | ollama pull llama3.2:3b-instruct-fp16 - name: Start Ollama in background run: | nohup ollama run llama3.2:3b-instruct-fp16 > ollama.log 2>&1 & - name: Set Up Environment and Install Dependencies run: | uv sync --extra dev --extra test uv pip install ollama faiss-cpu # always test against the latest version of the client uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main uv pip install -e . llama stack build --template ollama --image-type venv - name: Wait for Ollama to start run: | echo "Waiting for Ollama..." for i in {1..30}; do if curl -s http://localhost:11434 | grep -q "Ollama is running"; then echo "Ollama is running!" exit 0 fi sleep 1 done echo "Ollama failed to start" ollama ps ollama.log exit 1 - name: Start Llama Stack server in background env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" run: | source .venv/bin/activate nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv > server.log 2>&1 & - name: Wait for Llama Stack server to be ready run: | echo "Waiting for Llama Stack server..." for i in {1..30}; do if curl -s http://localhost:8321/v1/health | grep -q "OK"; then echo "Llama Stack server is up!" exit 0 fi sleep 1 done echo "Llama Stack server failed to start" cat server.log exit 1 - name: Run Integration Tests env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" run: | uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=ollama --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2