From 241344746785633d6d5629915723922919a1466b Mon Sep 17 00:00:00 2001
From: Ignas Baranauskas <ibaranau@redhat.com>
Date: Tue, 6 May 2025 13:56:20 +0100
Subject: [PATCH] ci: add new action to install ollama, cache the model (#2054)

# What does this PR do?
This PR introduces a reusable GitHub Actions workflow for pulling and
running an Ollama model, with caching to avoid repeated downloads.

[//]: # (If resolving an issue, uncomment and update the line below)
Closes: #1949

## Test Plan

1. Trigger a workflow that uses the Ollama setup. Confirm that:
- The model is pulled successfully.
- It is placed in the correct directory, official at the moment (not
~ollama/.ollama/models as per comment so need to confirm this).
2. Re-run the same workflow to validate that:
- The model is restored from the cache.
- Execution succeeds with the cached model.

[//]: # (## Documentation)
---
 .github/actions/setup-ollama/action.yml | 26 +++++++++++++++++++++++++
 .github/workflows/integration-tests.yml | 15 ++------------
 2 files changed, 28 insertions(+), 13 deletions(-)
 create mode 100644 .github/actions/setup-ollama/action.yml

diff --git a/.github/actions/setup-ollama/action.yml b/.github/actions/setup-ollama/action.yml
new file mode 100644
index 000000000..3dd6c940c
--- /dev/null
+++ b/.github/actions/setup-ollama/action.yml
@@ -0,0 +1,26 @@
+name: Setup Ollama
+description: Start Ollama and cache model
+inputs:
+  models:
+    description: Comma-separated list of models to pull
+    default: "llama3.2:3b-instruct-fp16,all-minilm:latest"
+runs:
+  using: "composite"
+  steps:
+    - name: Install and start Ollama
+      shell: bash
+      run: |
+        # the ollama installer also starts the ollama service
+        curl -fsSL https://ollama.com/install.sh | sh
+
+    # Do NOT cache models - pulling the cache is actually slower than just pulling the model.
+    # It takes ~45 seconds to pull the models from the cache and unpack it, but only 30 seconds to
+    # pull them directly.
+    # Maybe this is because the cache is being pulled at the same time by all the matrix jobs?
+    - name: Pull requested models
+      if: inputs.models != ''
+      shell: bash
+      run: |
+        for model in $(echo "${{ inputs.models }}" | tr ',' ' '); do
+          ollama pull "$model"
+        done
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 044569139..f82a7cdd2 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -38,19 +38,8 @@ jobs:
           python-version: "3.10"
           activate-environment: true
 
-      - name: Install and start Ollama
-        run: |
-          # the ollama installer also starts the ollama service
-          curl -fsSL https://ollama.com/install.sh | sh
-
-      # Do NOT cache models - pulling the cache is actually slower than just pulling the model.
-      # It takes ~45 seconds to pull the models from the cache and unpack it, but only 30 seconds to
-      # pull them directly.
-      # Maybe this is because the cache is being pulled at the same time by all the matrix jobs?
-      - name: Pull Ollama models (instruct and embed)
-        run: |
-          ollama pull llama3.2:3b-instruct-fp16
-          ollama pull all-minilm:latest
+      - name: Setup ollama
+        uses: ./.github/actions/setup-ollama
 
       - name: Set Up Environment and Install Dependencies
         run: |