ci(ollama): add GitHub Actions workflow for integration tests (#1546)

# What does this PR do? Added a GitHub Action to run inference tests for the Ollama provider. This ensures we have coverage for Ollama integration. --------- Signed-off-by: Sébastien Han <seb@redhat.com> Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-03-13 20:04:53 +01:00 · 2025-03-13 20:04:53 +01:00 · edfcb02a0e
commit edfcb02a0e
parent 42788a9d50
1 changed files with 80 additions and 0 deletions
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -0,0 +1,80 @@
 name: Integration tests
 on:
  pull_request:
  push:
    branches: [main]
 jobs:
  ollama:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Install uv
        uses: astral-sh/setup-uv@v4
        with:
          python-version: "3.10"
      - name: Install Ollama
        run: |
          curl -fsSL https://ollama.com/install.sh | sh
      - name: Pull Ollama image
        run: |
          ollama pull llama3.2:3b-instruct-fp16
      - name: Start Ollama in background
        run: |
          nohup ollama run llama3.2:3b-instruct-fp16 > ollama.log 2>&1 &
      - name: Set Up Environment and Install Dependencies
        run: |
          uv sync --extra dev --extra test
          uv pip install ollama faiss-cpu
          uv pip install -e .
      - name: Wait for Ollama to start
        run: |
          echo "Waiting for Ollama..."
          for i in {1..30}; do
            if curl -s http://localhost:11434 | grep -q "Ollama is running"; then
              echo "Ollama is running!"
              exit 0
            fi
            sleep 1
          done
          echo "Ollama failed to start"
          ollama ps
          ollama.log
          exit 1
      - name: Start Llama Stack server in background
        env:
          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
        run: |
          source .venv/bin/activate
          # TODO: use "llama stack run"
          nohup uv run python -m llama_stack.distribution.server.server --yaml-config ./llama_stack/templates/ollama/run.yaml > server.log 2>&1 &
      - name: Wait for Llama Stack server to be ready
        run: |
          echo "Waiting for Llama Stack server..."
          for i in {1..30}; do
            if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
              echo " Llama Stack server is up!"
              exit 0
            fi
            sleep 1
          done
          echo " Llama Stack server failed to start"
          cat server.log
          exit 1
      - name: Run Inference Integration Tests
        env:
          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
        run: |
          uv run pytest -v tests/integration/inference --stack-config=ollama --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2