ci: use ollama container image with loaded models (#2410)

# What does this PR do? Instead of downloading the models each time we now have a single Ollama container that is baked with the models pulled and ready to use. This will remove the CI flakiness on model pulling. Signed-off-by: Sébastien Han <seb@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-06-06 12:08:20 +02:00 · 2025-06-06 12:08:20 +02:00 · 0d0b8d2be1
commit 0d0b8d2be1
parent 692709cd45
4 changed files with 29 additions and 194 deletions
--- a/.github/actions/setup-ollama/action.yml
+++ b/.github/actions/setup-ollama/action.yml
@ -1,26 +1,9 @@
 name: Setup Ollama
-description: Start Ollama and cache model
+description: Start Ollama
 inputs:
  models:
    description: Comma-separated list of models to pull
    default: "llama3.2:3b-instruct-fp16,all-minilm:latest"
 runs:
  using: "composite"
  steps:
-    - name: Install and start Ollama
+    - name: Start Ollama
      shell: bash
      run: |
-        # the ollama installer also starts the ollama service
+        docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
        curl -fsSL https://ollama.com/install.sh | sh
    # Do NOT cache models - pulling the cache is actually slower than just pulling the model.
    # It takes ~45 seconds to pull the models from the cache and unpack it, but only 30 seconds to
    # pull them directly.
    # Maybe this is because the cache is being pulled at the same time by all the matrix jobs?
    - name: Pull requested models
      if: inputs.models != ''
      shell: bash
      run: |
        for model in $(echo "${{ inputs.models }}" | tr ',' ' '); do
          ollama pull "$model"
        done
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -50,7 +50,7 @@ jobs:
        env:
          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
        run: |
-          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv &
+          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" &
      - name: Wait for Llama Stack server to be ready
        if: matrix.client-type == 'http'
@ -87,6 +87,7 @@ jobs:
      - name: Run Integration Tests
        env:
          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
          OLLAMA_URL: "http://0.0.0.0:11434"
        run: |
          if [ "${{ matrix.client-type }}" == "library" ]; then
            stack_config="ollama"
@ -107,7 +108,7 @@ jobs:
      - name: Write ollama logs to file
        if: ${{ always() }}
        run: |
-          sudo journalctl -u ollama.service > ollama.log
+          sudo docker logs ollama > ollama.log
      - name: Upload all logs to artifacts
        if: ${{ always() }}
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 # This file was autogenerated by uv via the following command:
-#    uv export --frozen --no-hashes --no-emit-project --output-file=requirements.txt
+#    uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
 aiohappyeyeballs==2.5.0
    # via aiohttp
 aiohttp==3.11.13
@ -14,10 +14,6 @@ anyio==4.8.0
    #   llama-stack-client
    #   openai
    #   starlette
 appnope==0.1.4 ; sys_platform == 'darwin'
    # via ipykernel
 asttokens==3.0.0
    # via stack-data
 async-timeout==5.0.1 ; python_full_version < '3.11'
    # via aiohttp
 attrs==25.1.0
@ -25,41 +21,19 @@ attrs==25.1.0
    #   aiohttp
    #   jsonschema
    #   referencing
 black==25.1.0
 certifi==2025.1.31
    # via
    #   httpcore
    #   httpx
    #   requests
 cffi==1.17.1 ; implementation_name == 'pypy'
    # via pyzmq
 cfgv==3.4.0
    # via pre-commit
 charset-normalizer==3.4.1
    # via requests
 click==8.1.8
-    # via
+    # via llama-stack-client
    #   black
    #   llama-stack-client
    #   uvicorn
 colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   ipython
    #   pytest
    #   tqdm
 comm==0.2.2
    # via ipykernel
 coverage==7.6.12
    # via
    #   nbval
    #   pytest-cov
 debugpy==1.8.12
    # via ipykernel
 decorator==5.1.1
    # via ipython
 distlib==0.3.9
    # via virtualenv
 distro==1.9.0
    # via
    #   llama-stack-client
@ -67,20 +41,11 @@ distro==1.9.0
 ecdsa==0.19.1
    # via python-jose
 exceptiongroup==1.2.2 ; python_full_version < '3.11'
-    # via
+    # via anyio
    #   anyio
    #   ipython
    #   pytest
 executing==2.2.0
    # via stack-data
 fastapi==0.115.8
    # via llama-stack
 fastjsonschema==2.21.1
    # via nbformat
 filelock==3.17.0
-    # via
+    # via huggingface-hub
    #   huggingface-hub
    #   virtualenv
 fire==0.7.0
    # via llama-stack
 frozenlist==1.5.0
@ -93,7 +58,6 @@ h11==0.16.0
    # via
    #   httpcore
    #   llama-stack
    #   uvicorn
 httpcore==1.0.9
    # via httpx
 httpx==0.28.1
@ -103,119 +67,56 @@ httpx==0.28.1
    #   openai
 huggingface-hub==0.29.0
    # via llama-stack
 identify==2.6.7
    # via pre-commit
 idna==3.10
    # via
    #   anyio
    #   httpx
    #   requests
    #   yarl
 iniconfig==2.0.0
    # via pytest
 ipykernel==6.29.5
    # via nbval
 ipython==8.32.0
    # via ipykernel
 jedi==0.19.2
    # via ipython
 jinja2==3.1.6
-    # via
+    # via llama-stack
    #   llama-stack
    #   pytest-html
 jiter==0.8.2
    # via openai
 jsonschema==4.23.0
-    # via
+    # via llama-stack
    #   llama-stack
    #   nbformat
 jsonschema-specifications==2024.10.1
    # via jsonschema
 jupyter-client==8.6.3
    # via
    #   ipykernel
    #   nbval
 jupyter-core==5.7.2
    # via
    #   ipykernel
    #   jupyter-client
    #   nbformat
 llama-stack-client==0.2.10
    # via llama-stack
 markdown-it-py==3.0.0
    # via rich
 markupsafe==3.0.2
    # via jinja2
 matplotlib-inline==0.1.7
    # via
    #   ipykernel
    #   ipython
 mdurl==0.1.2
    # via markdown-it-py
 multidict==6.1.0
    # via
    #   aiohttp
    #   yarl
 mypy-extensions==1.0.0
    # via black
 nbformat==5.10.4
    # via nbval
 nbval==0.11.0
 nest-asyncio==1.6.0
    # via ipykernel
 nodeenv==1.9.1
    # via pre-commit
 numpy==2.2.3
    # via pandas
 openai==1.71.0
    # via llama-stack
 packaging==24.2
-    # via
+    # via huggingface-hub
    #   black
    #   huggingface-hub
    #   ipykernel
    #   pytest
 pandas==2.2.3
    # via llama-stack-client
 parso==0.8.4
    # via jedi
 pathspec==0.12.1
    # via black
 pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
    # via ipython
 pillow==11.1.0
    # via llama-stack
 platformdirs==4.3.6
    # via
    #   black
    #   jupyter-core
    #   virtualenv
 pluggy==1.5.0
    # via pytest
 pre-commit==4.1.0
 prompt-toolkit==3.0.50
    # via
    #   ipython
    #   llama-stack
    #   llama-stack-client
 propcache==0.3.0
    # via
    #   aiohttp
    #   yarl
 psutil==7.0.0
    # via ipykernel
 ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
    # via pexpect
 pure-eval==0.2.3
    # via stack-data
 pyaml==25.1.0
    # via llama-stack-client
 pyasn1==0.4.8
    # via
    #   python-jose
    #   rsa
 pycparser==2.22 ; implementation_name == 'pypy'
    # via cffi
 pydantic==2.10.6
    # via
    #   fastapi
@ -225,31 +126,9 @@ pydantic==2.10.6
 pydantic-core==2.27.2
    # via pydantic
 pygments==2.19.1
-    # via
+    # via rich
    #   ipython
    #   rich
 pytest==8.3.4
    # via
    #   nbval
    #   pytest-asyncio
    #   pytest-cov
    #   pytest-html
    #   pytest-json-report
    #   pytest-metadata
    #   pytest-timeout
 pytest-asyncio==0.25.3
 pytest-cov==6.0.0
 pytest-html==4.1.1
 pytest-json-report==1.5.0
 pytest-metadata==3.1.1
    # via
    #   pytest-html
    #   pytest-json-report
 pytest-timeout==2.4.0
 python-dateutil==2.9.0.post0
-    # via
+    # via pandas
    #   jupyter-client
    #   pandas
 python-dotenv==1.0.1
    # via llama-stack
 python-jose==3.4.0
@ -258,17 +137,10 @@ python-multipart==0.0.20
    # via llama-stack
 pytz==2025.1
    # via pandas
 pywin32==308 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32'
    # via jupyter-core
 pyyaml==6.0.2
    # via
    #   huggingface-hub
    #   pre-commit
    #   pyaml
 pyzmq==26.2.1
    # via
    #   ipykernel
    #   jupyter-client
 referencing==0.36.2
    # via
    #   jsonschema
@ -290,10 +162,6 @@ rpds-py==0.22.3
    #   referencing
 rsa==4.9
    # via python-jose
 ruamel-yaml==0.18.10
 ruamel-yaml-clib==0.2.12 ; python_full_version < '3.13' and platform_python_implementation == 'CPython'
    # via ruamel-yaml
 ruff==0.9.6
 setuptools==80.8.0
    # via llama-stack
 six==1.17.0
@ -305,8 +173,6 @@ sniffio==1.3.1
    #   anyio
    #   llama-stack-client
    #   openai
 stack-data==0.6.3
    # via ipython
 starlette==0.45.3
    # via
    #   fastapi
@ -318,38 +184,16 @@ termcolor==2.5.0
    #   llama-stack-client
 tiktoken==0.9.0
    # via llama-stack
 tomli==2.2.1 ; python_full_version <= '3.11'
    # via
    #   black
    #   coverage
    #   pytest
 tornado==6.4.2
    # via
    #   ipykernel
    #   jupyter-client
 tqdm==4.67.1
    # via
    #   huggingface-hub
    #   llama-stack-client
    #   openai
 traitlets==5.14.3
    # via
    #   comm
    #   ipykernel
    #   ipython
    #   jupyter-client
    #   jupyter-core
    #   matplotlib-inline
    #   nbformat
 types-requests==2.32.0.20241016
 types-setuptools==75.8.0.20250210
 typing-extensions==4.12.2
    # via
    #   anyio
    #   black
    #   fastapi
    #   huggingface-hub
    #   ipython
    #   llama-stack-client
    #   multidict
    #   openai
@ -357,16 +201,10 @@ typing-extensions==4.12.2
    #   pydantic-core
    #   referencing
    #   rich
    #   uvicorn
 tzdata==2025.1
    # via pandas
 urllib3==2.3.0
-    # via
+    # via requests
    #   requests
    #   types-requests
 uvicorn==0.34.0
 virtualenv==20.29.2
    # via pre-commit
 wcwidth==0.2.13
    # via prompt-toolkit
 yarl==1.18.3
--- a/tests/Containerfile
+++ b/tests/Containerfile
@ -0,0 +1,13 @@
 # Containerfile used to build our all in one ollama image to run tests in CI
 # podman build --platform linux/amd64 -f Containerfile -t ollama-with-models .
 #
 FROM --platform=linux/amd64 ollama/ollama:latest
 # Start ollama and pull models in a single layer
 RUN ollama serve & \
    sleep 5 && \
    ollama pull llama3.2:3b-instruct-fp16 && \
    ollama pull all-minilm:latest
 # Set the entrypoint to start ollama serve
 ENTRYPOINT ["ollama", "serve"]