ci: use ollama container image with loaded models (#2410)

# What does this PR do? Instead of downloading the models each time we now have a single Ollama container that is baked with the models pulled and ready to use. This will remove the CI flakiness on model pulling. Signed-off-by: Sébastien Han <seb@redhat.com>
2025-06-27 10:46:41 +00:00 · 2025-06-06 12:08:20 +02:00 · 2025-06-06 12:08:20 +02:00 · 0d0b8d2be1
commit 0d0b8d2be1
parent 692709cd45
4 changed files with 29 additions and 194 deletions
--- a/.github/actions/setup-ollama/action.yml
+++ b/.github/actions/setup-ollama/action.yml
@ -1,26 +1,9 @@
 name: Setup Ollama
-description: Start Ollama and cache model
-inputs:
-  models:
-    description: Comma-separated list of models to pull
-    default: "llama3.2:3b-instruct-fp16,all-minilm:latest"
+description: Start Ollama
 runs:
  using: "composite"
  steps:
-    - name: Install and start Ollama
+    - name: Start Ollama
      shell: bash
      run: |
-        # the ollama installer also starts the ollama service
-        curl -fsSL https://ollama.com/install.sh | sh
-
-    # Do NOT cache models - pulling the cache is actually slower than just pulling the model.
-    # It takes ~45 seconds to pull the models from the cache and unpack it, but only 30 seconds to
-    # pull them directly.
-    # Maybe this is because the cache is being pulled at the same time by all the matrix jobs?
-    - name: Pull requested models
-      if: inputs.models != ''
-      shell: bash
-      run: |
-        for model in $(echo "${{ inputs.models }}" | tr ',' ' '); do
-          ollama pull "$model"
-        done
+        docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -50,7 +50,7 @@ jobs:
        env:
          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
        run: |
-          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv &
+          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" &

      - name: Wait for Llama Stack server to be ready
        if: matrix.client-type == 'http'
@ -87,6 +87,7 @@ jobs:
      - name: Run Integration Tests
        env:
          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
+          OLLAMA_URL: "http://0.0.0.0:11434"
        run: |
          if [ "${{ matrix.client-type }}" == "library" ]; then
            stack_config="ollama"
@ -107,7 +108,7 @@ jobs:
      - name: Write ollama logs to file
        if: ${{ always() }}
        run: |
-          sudo journalctl -u ollama.service > ollama.log
+          sudo docker logs ollama > ollama.log

      - name: Upload all logs to artifacts
        if: ${{ always() }}
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 # This file was autogenerated by uv via the following command:
-#    uv export --frozen --no-hashes --no-emit-project --output-file=requirements.txt
+#    uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
 aiohappyeyeballs==2.5.0
    # via aiohttp
 aiohttp==3.11.13
@ -14,10 +14,6 @@ anyio==4.8.0
    #   llama-stack-client
    #   openai
    #   starlette
-appnope==0.1.4 ; sys_platform == 'darwin'
-    # via ipykernel
-asttokens==3.0.0
-    # via stack-data
 async-timeout==5.0.1 ; python_full_version < '3.11'
    # via aiohttp
 attrs==25.1.0
@ -25,41 +21,19 @@ attrs==25.1.0
    #   aiohttp
    #   jsonschema
    #   referencing
-black==25.1.0
 certifi==2025.1.31
    # via
    #   httpcore
    #   httpx
    #   requests
-cffi==1.17.1 ; implementation_name == 'pypy'
-    # via pyzmq
-cfgv==3.4.0
-    # via pre-commit
 charset-normalizer==3.4.1
    # via requests
 click==8.1.8
-    # via
-    #   black
-    #   llama-stack-client
-    #   uvicorn
+    # via llama-stack-client
 colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
-    #   ipython
-    #   pytest
    #   tqdm
-comm==0.2.2
-    # via ipykernel
-coverage==7.6.12
-    # via
-    #   nbval
-    #   pytest-cov
-debugpy==1.8.12
-    # via ipykernel
-decorator==5.1.1
-    # via ipython
-distlib==0.3.9
-    # via virtualenv
 distro==1.9.0
    # via
    #   llama-stack-client
@ -67,20 +41,11 @@ distro==1.9.0
 ecdsa==0.19.1
    # via python-jose
 exceptiongroup==1.2.2 ; python_full_version < '3.11'
-    # via
-    #   anyio
-    #   ipython
-    #   pytest
-executing==2.2.0
-    # via stack-data
+    # via anyio
 fastapi==0.115.8
    # via llama-stack
-fastjsonschema==2.21.1
-    # via nbformat
 filelock==3.17.0
-    # via
-    #   huggingface-hub
-    #   virtualenv
+    # via huggingface-hub
 fire==0.7.0
    # via llama-stack
 frozenlist==1.5.0
@ -93,7 +58,6 @@ h11==0.16.0
    # via
    #   httpcore
    #   llama-stack
-    #   uvicorn
 httpcore==1.0.9
    # via httpx
 httpx==0.28.1
@ -103,119 +67,56 @@ httpx==0.28.1
    #   openai
 huggingface-hub==0.29.0
    # via llama-stack
-identify==2.6.7
-    # via pre-commit
 idna==3.10
    # via
    #   anyio
    #   httpx
    #   requests
    #   yarl
-iniconfig==2.0.0
-    # via pytest
-ipykernel==6.29.5
-    # via nbval
-ipython==8.32.0
-    # via ipykernel
-jedi==0.19.2
-    # via ipython
 jinja2==3.1.6
-    # via
-    #   llama-stack
-    #   pytest-html
+    # via llama-stack
 jiter==0.8.2
    # via openai
 jsonschema==4.23.0
-    # via
-    #   llama-stack
-    #   nbformat
+    # via llama-stack
 jsonschema-specifications==2024.10.1
    # via jsonschema
-jupyter-client==8.6.3
-    # via
-    #   ipykernel
-    #   nbval
-jupyter-core==5.7.2
-    # via
-    #   ipykernel
-    #   jupyter-client
-    #   nbformat
 llama-stack-client==0.2.10
    # via llama-stack
 markdown-it-py==3.0.0
    # via rich
 markupsafe==3.0.2
    # via jinja2
-matplotlib-inline==0.1.7
-    # via
-    #   ipykernel
-    #   ipython
 mdurl==0.1.2
    # via markdown-it-py
 multidict==6.1.0
    # via
    #   aiohttp
    #   yarl
-mypy-extensions==1.0.0
-    # via black
-nbformat==5.10.4
-    # via nbval
-nbval==0.11.0
-nest-asyncio==1.6.0
-    # via ipykernel
-nodeenv==1.9.1
-    # via pre-commit
 numpy==2.2.3
    # via pandas
 openai==1.71.0
    # via llama-stack
 packaging==24.2
-    # via
-    #   black
-    #   huggingface-hub
-    #   ipykernel
-    #   pytest
+    # via huggingface-hub
 pandas==2.2.3
    # via llama-stack-client
-parso==0.8.4
-    # via jedi
-pathspec==0.12.1
-    # via black
-pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
-    # via ipython
 pillow==11.1.0
    # via llama-stack
-platformdirs==4.3.6
-    # via
-    #   black
-    #   jupyter-core
-    #   virtualenv
-pluggy==1.5.0
-    # via pytest
-pre-commit==4.1.0
 prompt-toolkit==3.0.50
    # via
-    #   ipython
    #   llama-stack
    #   llama-stack-client
 propcache==0.3.0
    # via
    #   aiohttp
    #   yarl
-psutil==7.0.0
-    # via ipykernel
-ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
-    # via pexpect
-pure-eval==0.2.3
-    # via stack-data
 pyaml==25.1.0
    # via llama-stack-client
 pyasn1==0.4.8
    # via
    #   python-jose
    #   rsa
-pycparser==2.22 ; implementation_name == 'pypy'
-    # via cffi
 pydantic==2.10.6
    # via
    #   fastapi
@ -225,31 +126,9 @@ pydantic==2.10.6
 pydantic-core==2.27.2
    # via pydantic
 pygments==2.19.1
-    # via
-    #   ipython
-    #   rich
-pytest==8.3.4
-    # via
-    #   nbval
-    #   pytest-asyncio
-    #   pytest-cov
-    #   pytest-html
-    #   pytest-json-report
-    #   pytest-metadata
-    #   pytest-timeout
-pytest-asyncio==0.25.3
-pytest-cov==6.0.0
-pytest-html==4.1.1
-pytest-json-report==1.5.0
-pytest-metadata==3.1.1
-    # via
-    #   pytest-html
-    #   pytest-json-report
-pytest-timeout==2.4.0
+    # via rich
 python-dateutil==2.9.0.post0
-    # via
-    #   jupyter-client
-    #   pandas
+    # via pandas
 python-dotenv==1.0.1
    # via llama-stack
 python-jose==3.4.0
@ -258,17 +137,10 @@ python-multipart==0.0.20
    # via llama-stack
 pytz==2025.1
    # via pandas
-pywin32==308 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32'
-    # via jupyter-core
 pyyaml==6.0.2
    # via
    #   huggingface-hub
-    #   pre-commit
    #   pyaml
-pyzmq==26.2.1
-    # via
-    #   ipykernel
-    #   jupyter-client
 referencing==0.36.2
    # via
    #   jsonschema
@ -290,10 +162,6 @@ rpds-py==0.22.3
    #   referencing
 rsa==4.9
    # via python-jose
-ruamel-yaml==0.18.10
-ruamel-yaml-clib==0.2.12 ; python_full_version < '3.13' and platform_python_implementation == 'CPython'
-    # via ruamel-yaml
-ruff==0.9.6
 setuptools==80.8.0
    # via llama-stack
 six==1.17.0
@ -305,8 +173,6 @@ sniffio==1.3.1
    #   anyio
    #   llama-stack-client
    #   openai
-stack-data==0.6.3
-    # via ipython
 starlette==0.45.3
    # via
    #   fastapi
@ -318,38 +184,16 @@ termcolor==2.5.0
    #   llama-stack-client
 tiktoken==0.9.0
    # via llama-stack
-tomli==2.2.1 ; python_full_version <= '3.11'
-    # via
-    #   black
-    #   coverage
-    #   pytest
-tornado==6.4.2
-    # via
-    #   ipykernel
-    #   jupyter-client
 tqdm==4.67.1
    # via
    #   huggingface-hub
    #   llama-stack-client
    #   openai
-traitlets==5.14.3
-    # via
-    #   comm
-    #   ipykernel
-    #   ipython
-    #   jupyter-client
-    #   jupyter-core
-    #   matplotlib-inline
-    #   nbformat
-types-requests==2.32.0.20241016
-types-setuptools==75.8.0.20250210
 typing-extensions==4.12.2
    # via
    #   anyio
-    #   black
    #   fastapi
    #   huggingface-hub
-    #   ipython
    #   llama-stack-client
    #   multidict
    #   openai
@ -357,16 +201,10 @@ typing-extensions==4.12.2
    #   pydantic-core
    #   referencing
    #   rich
-    #   uvicorn
 tzdata==2025.1
    # via pandas
 urllib3==2.3.0
-    # via
-    #   requests
-    #   types-requests
-uvicorn==0.34.0
-virtualenv==20.29.2
-    # via pre-commit
+    # via requests
 wcwidth==0.2.13
    # via prompt-toolkit
 yarl==1.18.3
--- a/tests/Containerfile
+++ b/tests/Containerfile
@ -0,0 +1,13 @@
+# Containerfile used to build our all in one ollama image to run tests in CI
+# podman build --platform linux/amd64 -f Containerfile -t ollama-with-models .
+#
+FROM --platform=linux/amd64 ollama/ollama:latest
+
+# Start ollama and pull models in a single layer
+RUN ollama serve & \
+    sleep 5 && \
+    ollama pull llama3.2:3b-instruct-fp16 && \
+    ollama pull all-minilm:latest
+
+# Set the entrypoint to start ollama serve
+ENTRYPOINT ["ollama", "serve"]