chore(ci): misc Ollama improvements (#2052)

# What does this PR do? * pull the embedding model so that it's not pulled during the distro server startup sequence * cache the models * collect logs at the end of the workflow Signed-off-by: Sébastien Han <seb@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-04-30 16:05:28 +02:00 · 2025-04-30 16:05:28 +02:00 · 653e8526ec
commit 653e8526ec
parent 78ef6a6099
1 changed files with 19 additions and 2 deletions
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -43,10 +43,14 @@ jobs:
          # the ollama installer also starts the ollama service
          curl -fsSL https://ollama.com/install.sh | sh
-      - name: Pull Ollama image
+      # Do NOT cache models - pulling the cache is actually slower than just pulling the model.
      # It takes ~45 seconds to pull the models from the cache and unpack it, but only 30 seconds to
      # pull them directly.
      # Maybe this is because the cache is being pulled at the same time by all the matrix jobs?
      - name: Pull Ollama models (instruct and embed)
        run: |
          # TODO: cache the model. OLLAMA_MODELS defaults to ~ollama/.ollama/models.
          ollama pull llama3.2:3b-instruct-fp16
          ollama pull all-minilm:latest
      - name: Set Up Environment and Install Dependencies
        run: |
@ -106,3 +110,16 @@ jobs:
            -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
            --text-model="meta-llama/Llama-3.2-3B-Instruct" \
            --embedding-model=all-MiniLM-L6-v2
      - name: Write ollama logs to file
        run: |
          sudo journalctl -u ollama.service > ollama.log
      - name: Upload all logs to artifacts
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.client-type }}-${{ matrix.test-type }}
          path: |
            *.log
          retention-days: 1