diff --git a/.github/actions/setup-ollama/action.yml b/.github/actions/setup-ollama/action.yml
index 3dd6c940c..1f6e9818b 100644
--- a/.github/actions/setup-ollama/action.yml
+++ b/.github/actions/setup-ollama/action.yml
@@ -1,26 +1,9 @@
 name: Setup Ollama
-description: Start Ollama and cache model
-inputs:
-  models:
-    description: Comma-separated list of models to pull
-    default: "llama3.2:3b-instruct-fp16,all-minilm:latest"
+description: Start Ollama
 runs:
   using: "composite"
   steps:
-    - name: Install and start Ollama
+    - name: Start Ollama
       shell: bash
       run: |
-        # the ollama installer also starts the ollama service
-        curl -fsSL https://ollama.com/install.sh | sh
-
-    # Do NOT cache models - pulling the cache is actually slower than just pulling the model.
-    # It takes ~45 seconds to pull the models from the cache and unpack it, but only 30 seconds to
-    # pull them directly.
-    # Maybe this is because the cache is being pulled at the same time by all the matrix jobs?
-    - name: Pull requested models
-      if: inputs.models != ''
-      shell: bash
-      run: |
-        for model in $(echo "${{ inputs.models }}" | tr ',' ' '); do
-          ollama pull "$model"
-        done
+        docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 13b35643e..7aa8b5807 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -50,7 +50,7 @@ jobs:
         env:
           INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
         run: |
-          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv &
+          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" &
 
       - name: Wait for Llama Stack server to be ready
         if: matrix.client-type == 'http'
@@ -87,6 +87,7 @@ jobs:
       - name: Run Integration Tests
         env:
           INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
+          OLLAMA_URL: "http://0.0.0.0:11434"
         run: |
           if [ "${{ matrix.client-type }}" == "library" ]; then
             stack_config="ollama"
@@ -107,7 +108,7 @@ jobs:
       - name: Write ollama logs to file
         if: ${{ always() }}
         run: |
-          sudo journalctl -u ollama.service > ollama.log
+          sudo docker logs ollama > ollama.log
 
       - name: Upload all logs to artifacts
         if: ${{ always() }}
diff --git a/requirements.txt b/requirements.txt
index 04c5ae74f..cfd63b456 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 # This file was autogenerated by uv via the following command:
-#    uv export --frozen --no-hashes --no-emit-project --output-file=requirements.txt
+#    uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt
 aiohappyeyeballs==2.5.0
     # via aiohttp
 aiohttp==3.11.13
@@ -14,10 +14,6 @@ anyio==4.8.0
     #   llama-stack-client
     #   openai
     #   starlette
-appnope==0.1.4 ; sys_platform == 'darwin'
-    # via ipykernel
-asttokens==3.0.0
-    # via stack-data
 async-timeout==5.0.1 ; python_full_version < '3.11'
     # via aiohttp
 attrs==25.1.0
@@ -25,41 +21,19 @@ attrs==25.1.0
     #   aiohttp
     #   jsonschema
     #   referencing
-black==25.1.0
 certifi==2025.1.31
     # via
     #   httpcore
     #   httpx
     #   requests
-cffi==1.17.1 ; implementation_name == 'pypy'
-    # via pyzmq
-cfgv==3.4.0
-    # via pre-commit
 charset-normalizer==3.4.1
     # via requests
 click==8.1.8
-    # via
-    #   black
-    #   llama-stack-client
-    #   uvicorn
+    # via llama-stack-client
 colorama==0.4.6 ; sys_platform == 'win32'
     # via
     #   click
-    #   ipython
-    #   pytest
     #   tqdm
-comm==0.2.2
-    # via ipykernel
-coverage==7.6.12
-    # via
-    #   nbval
-    #   pytest-cov
-debugpy==1.8.12
-    # via ipykernel
-decorator==5.1.1
-    # via ipython
-distlib==0.3.9
-    # via virtualenv
 distro==1.9.0
     # via
     #   llama-stack-client
@@ -67,20 +41,11 @@ distro==1.9.0
 ecdsa==0.19.1
     # via python-jose
 exceptiongroup==1.2.2 ; python_full_version < '3.11'
-    # via
-    #   anyio
-    #   ipython
-    #   pytest
-executing==2.2.0
-    # via stack-data
+    # via anyio
 fastapi==0.115.8
     # via llama-stack
-fastjsonschema==2.21.1
-    # via nbformat
 filelock==3.17.0
-    # via
-    #   huggingface-hub
-    #   virtualenv
+    # via huggingface-hub
 fire==0.7.0
     # via llama-stack
 frozenlist==1.5.0
@@ -93,7 +58,6 @@ h11==0.16.0
     # via
     #   httpcore
     #   llama-stack
-    #   uvicorn
 httpcore==1.0.9
     # via httpx
 httpx==0.28.1
@@ -103,119 +67,56 @@ httpx==0.28.1
     #   openai
 huggingface-hub==0.29.0
     # via llama-stack
-identify==2.6.7
-    # via pre-commit
 idna==3.10
     # via
     #   anyio
     #   httpx
     #   requests
     #   yarl
-iniconfig==2.0.0
-    # via pytest
-ipykernel==6.29.5
-    # via nbval
-ipython==8.32.0
-    # via ipykernel
-jedi==0.19.2
-    # via ipython
 jinja2==3.1.6
-    # via
-    #   llama-stack
-    #   pytest-html
+    # via llama-stack
 jiter==0.8.2
     # via openai
 jsonschema==4.23.0
-    # via
-    #   llama-stack
-    #   nbformat
+    # via llama-stack
 jsonschema-specifications==2024.10.1
     # via jsonschema
-jupyter-client==8.6.3
-    # via
-    #   ipykernel
-    #   nbval
-jupyter-core==5.7.2
-    # via
-    #   ipykernel
-    #   jupyter-client
-    #   nbformat
 llama-stack-client==0.2.10
     # via llama-stack
 markdown-it-py==3.0.0
     # via rich
 markupsafe==3.0.2
     # via jinja2
-matplotlib-inline==0.1.7
-    # via
-    #   ipykernel
-    #   ipython
 mdurl==0.1.2
     # via markdown-it-py
 multidict==6.1.0
     # via
     #   aiohttp
     #   yarl
-mypy-extensions==1.0.0
-    # via black
-nbformat==5.10.4
-    # via nbval
-nbval==0.11.0
-nest-asyncio==1.6.0
-    # via ipykernel
-nodeenv==1.9.1
-    # via pre-commit
 numpy==2.2.3
     # via pandas
 openai==1.71.0
     # via llama-stack
 packaging==24.2
-    # via
-    #   black
-    #   huggingface-hub
-    #   ipykernel
-    #   pytest
+    # via huggingface-hub
 pandas==2.2.3
     # via llama-stack-client
-parso==0.8.4
-    # via jedi
-pathspec==0.12.1
-    # via black
-pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
-    # via ipython
 pillow==11.1.0
     # via llama-stack
-platformdirs==4.3.6
-    # via
-    #   black
-    #   jupyter-core
-    #   virtualenv
-pluggy==1.5.0
-    # via pytest
-pre-commit==4.1.0
 prompt-toolkit==3.0.50
     # via
-    #   ipython
     #   llama-stack
     #   llama-stack-client
 propcache==0.3.0
     # via
     #   aiohttp
     #   yarl
-psutil==7.0.0
-    # via ipykernel
-ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
-    # via pexpect
-pure-eval==0.2.3
-    # via stack-data
 pyaml==25.1.0
     # via llama-stack-client
 pyasn1==0.4.8
     # via
     #   python-jose
     #   rsa
-pycparser==2.22 ; implementation_name == 'pypy'
-    # via cffi
 pydantic==2.10.6
     # via
     #   fastapi
@@ -225,31 +126,9 @@ pydantic==2.10.6
 pydantic-core==2.27.2
     # via pydantic
 pygments==2.19.1
-    # via
-    #   ipython
-    #   rich
-pytest==8.3.4
-    # via
-    #   nbval
-    #   pytest-asyncio
-    #   pytest-cov
-    #   pytest-html
-    #   pytest-json-report
-    #   pytest-metadata
-    #   pytest-timeout
-pytest-asyncio==0.25.3
-pytest-cov==6.0.0
-pytest-html==4.1.1
-pytest-json-report==1.5.0
-pytest-metadata==3.1.1
-    # via
-    #   pytest-html
-    #   pytest-json-report
-pytest-timeout==2.4.0
+    # via rich
 python-dateutil==2.9.0.post0
-    # via
-    #   jupyter-client
-    #   pandas
+    # via pandas
 python-dotenv==1.0.1
     # via llama-stack
 python-jose==3.4.0
@@ -258,17 +137,10 @@ python-multipart==0.0.20
     # via llama-stack
 pytz==2025.1
     # via pandas
-pywin32==308 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32'
-    # via jupyter-core
 pyyaml==6.0.2
     # via
     #   huggingface-hub
-    #   pre-commit
     #   pyaml
-pyzmq==26.2.1
-    # via
-    #   ipykernel
-    #   jupyter-client
 referencing==0.36.2
     # via
     #   jsonschema
@@ -290,10 +162,6 @@ rpds-py==0.22.3
     #   referencing
 rsa==4.9
     # via python-jose
-ruamel-yaml==0.18.10
-ruamel-yaml-clib==0.2.12 ; python_full_version < '3.13' and platform_python_implementation == 'CPython'
-    # via ruamel-yaml
-ruff==0.9.6
 setuptools==80.8.0
     # via llama-stack
 six==1.17.0
@@ -305,8 +173,6 @@ sniffio==1.3.1
     #   anyio
     #   llama-stack-client
     #   openai
-stack-data==0.6.3
-    # via ipython
 starlette==0.45.3
     # via
     #   fastapi
@@ -318,38 +184,16 @@ termcolor==2.5.0
     #   llama-stack-client
 tiktoken==0.9.0
     # via llama-stack
-tomli==2.2.1 ; python_full_version <= '3.11'
-    # via
-    #   black
-    #   coverage
-    #   pytest
-tornado==6.4.2
-    # via
-    #   ipykernel
-    #   jupyter-client
 tqdm==4.67.1
     # via
     #   huggingface-hub
     #   llama-stack-client
     #   openai
-traitlets==5.14.3
-    # via
-    #   comm
-    #   ipykernel
-    #   ipython
-    #   jupyter-client
-    #   jupyter-core
-    #   matplotlib-inline
-    #   nbformat
-types-requests==2.32.0.20241016
-types-setuptools==75.8.0.20250210
 typing-extensions==4.12.2
     # via
     #   anyio
-    #   black
     #   fastapi
     #   huggingface-hub
-    #   ipython
     #   llama-stack-client
     #   multidict
     #   openai
@@ -357,16 +201,10 @@ typing-extensions==4.12.2
     #   pydantic-core
     #   referencing
     #   rich
-    #   uvicorn
 tzdata==2025.1
     # via pandas
 urllib3==2.3.0
-    # via
-    #   requests
-    #   types-requests
-uvicorn==0.34.0
-virtualenv==20.29.2
-    # via pre-commit
+    # via requests
 wcwidth==0.2.13
     # via prompt-toolkit
 yarl==1.18.3
diff --git a/tests/Containerfile b/tests/Containerfile
new file mode 100644
index 000000000..3080d053a
--- /dev/null
+++ b/tests/Containerfile
@@ -0,0 +1,13 @@
+# Containerfile used to build our all in one ollama image to run tests in CI
+# podman build --platform linux/amd64 -f Containerfile -t ollama-with-models .
+#
+FROM --platform=linux/amd64 ollama/ollama:latest
+
+# Start ollama and pull models in a single layer
+RUN ollama serve & \
+    sleep 5 && \
+    ollama pull llama3.2:3b-instruct-fp16 && \
+    ollama pull all-minilm:latest
+
+# Set the entrypoint to start ollama serve
+ENTRYPOINT ["ollama", "serve"]