diff --git a/.github/actions/setup-ollama/action.yml b/.github/actions/setup-ollama/action.yml index 3dd6c940c..1f6e9818b 100644 --- a/.github/actions/setup-ollama/action.yml +++ b/.github/actions/setup-ollama/action.yml @@ -1,26 +1,9 @@ name: Setup Ollama -description: Start Ollama and cache model -inputs: - models: - description: Comma-separated list of models to pull - default: "llama3.2:3b-instruct-fp16,all-minilm:latest" +description: Start Ollama runs: using: "composite" steps: - - name: Install and start Ollama + - name: Start Ollama shell: bash run: | - # the ollama installer also starts the ollama service - curl -fsSL https://ollama.com/install.sh | sh - - # Do NOT cache models - pulling the cache is actually slower than just pulling the model. - # It takes ~45 seconds to pull the models from the cache and unpack it, but only 30 seconds to - # pull them directly. - # Maybe this is because the cache is being pulled at the same time by all the matrix jobs? - - name: Pull requested models - if: inputs.models != '' - shell: bash - run: | - for model in $(echo "${{ inputs.models }}" | tr ',' ' '); do - ollama pull "$model" - done + docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 13b35643e..7aa8b5807 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -50,7 +50,7 @@ jobs: env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" run: | - LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv & + LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" & - name: Wait for Llama Stack server to be ready if: matrix.client-type == 'http' @@ -87,6 +87,7 @@ jobs: - name: Run Integration Tests env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" + OLLAMA_URL: "http://0.0.0.0:11434" run: | if [ "${{ matrix.client-type }}" == "library" ]; then stack_config="ollama" @@ -107,7 +108,7 @@ jobs: - name: Write ollama logs to file if: ${{ always() }} run: | - sudo journalctl -u ollama.service > ollama.log + sudo docker logs ollama > ollama.log - name: Upload all logs to artifacts if: ${{ always() }} diff --git a/requirements.txt b/requirements.txt index 04c5ae74f..cfd63b456 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv export --frozen --no-hashes --no-emit-project --output-file=requirements.txt +# uv export --frozen --no-hashes --no-emit-project --no-default-groups --output-file=requirements.txt aiohappyeyeballs==2.5.0 # via aiohttp aiohttp==3.11.13 @@ -14,10 +14,6 @@ anyio==4.8.0 # llama-stack-client # openai # starlette -appnope==0.1.4 ; sys_platform == 'darwin' - # via ipykernel -asttokens==3.0.0 - # via stack-data async-timeout==5.0.1 ; python_full_version < '3.11' # via aiohttp attrs==25.1.0 @@ -25,41 +21,19 @@ attrs==25.1.0 # aiohttp # jsonschema # referencing -black==25.1.0 certifi==2025.1.31 # via # httpcore # httpx # requests -cffi==1.17.1 ; implementation_name == 'pypy' - # via pyzmq -cfgv==3.4.0 - # via pre-commit charset-normalizer==3.4.1 # via requests click==8.1.8 - # via - # black - # llama-stack-client - # uvicorn + # via llama-stack-client colorama==0.4.6 ; sys_platform == 'win32' # via # click - # ipython - # pytest # tqdm -comm==0.2.2 - # via ipykernel -coverage==7.6.12 - # via - # nbval - # pytest-cov -debugpy==1.8.12 - # via ipykernel -decorator==5.1.1 - # via ipython -distlib==0.3.9 - # via virtualenv distro==1.9.0 # via # llama-stack-client @@ -67,20 +41,11 @@ distro==1.9.0 ecdsa==0.19.1 # via python-jose exceptiongroup==1.2.2 ; python_full_version < '3.11' - # via - # anyio - # ipython - # pytest -executing==2.2.0 - # via stack-data + # via anyio fastapi==0.115.8 # via llama-stack -fastjsonschema==2.21.1 - # via nbformat filelock==3.17.0 - # via - # huggingface-hub - # virtualenv + # via huggingface-hub fire==0.7.0 # via llama-stack frozenlist==1.5.0 @@ -93,7 +58,6 @@ h11==0.16.0 # via # httpcore # llama-stack - # uvicorn httpcore==1.0.9 # via httpx httpx==0.28.1 @@ -103,119 +67,56 @@ httpx==0.28.1 # openai huggingface-hub==0.29.0 # via llama-stack -identify==2.6.7 - # via pre-commit idna==3.10 # via # anyio # httpx # requests # yarl -iniconfig==2.0.0 - # via pytest -ipykernel==6.29.5 - # via nbval -ipython==8.32.0 - # via ipykernel -jedi==0.19.2 - # via ipython jinja2==3.1.6 - # via - # llama-stack - # pytest-html + # via llama-stack jiter==0.8.2 # via openai jsonschema==4.23.0 - # via - # llama-stack - # nbformat + # via llama-stack jsonschema-specifications==2024.10.1 # via jsonschema -jupyter-client==8.6.3 - # via - # ipykernel - # nbval -jupyter-core==5.7.2 - # via - # ipykernel - # jupyter-client - # nbformat llama-stack-client==0.2.10 # via llama-stack markdown-it-py==3.0.0 # via rich markupsafe==3.0.2 # via jinja2 -matplotlib-inline==0.1.7 - # via - # ipykernel - # ipython mdurl==0.1.2 # via markdown-it-py multidict==6.1.0 # via # aiohttp # yarl -mypy-extensions==1.0.0 - # via black -nbformat==5.10.4 - # via nbval -nbval==0.11.0 -nest-asyncio==1.6.0 - # via ipykernel -nodeenv==1.9.1 - # via pre-commit numpy==2.2.3 # via pandas openai==1.71.0 # via llama-stack packaging==24.2 - # via - # black - # huggingface-hub - # ipykernel - # pytest + # via huggingface-hub pandas==2.2.3 # via llama-stack-client -parso==0.8.4 - # via jedi -pathspec==0.12.1 - # via black -pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32' - # via ipython pillow==11.1.0 # via llama-stack -platformdirs==4.3.6 - # via - # black - # jupyter-core - # virtualenv -pluggy==1.5.0 - # via pytest -pre-commit==4.1.0 prompt-toolkit==3.0.50 # via - # ipython # llama-stack # llama-stack-client propcache==0.3.0 # via # aiohttp # yarl -psutil==7.0.0 - # via ipykernel -ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32' - # via pexpect -pure-eval==0.2.3 - # via stack-data pyaml==25.1.0 # via llama-stack-client pyasn1==0.4.8 # via # python-jose # rsa -pycparser==2.22 ; implementation_name == 'pypy' - # via cffi pydantic==2.10.6 # via # fastapi @@ -225,31 +126,9 @@ pydantic==2.10.6 pydantic-core==2.27.2 # via pydantic pygments==2.19.1 - # via - # ipython - # rich -pytest==8.3.4 - # via - # nbval - # pytest-asyncio - # pytest-cov - # pytest-html - # pytest-json-report - # pytest-metadata - # pytest-timeout -pytest-asyncio==0.25.3 -pytest-cov==6.0.0 -pytest-html==4.1.1 -pytest-json-report==1.5.0 -pytest-metadata==3.1.1 - # via - # pytest-html - # pytest-json-report -pytest-timeout==2.4.0 + # via rich python-dateutil==2.9.0.post0 - # via - # jupyter-client - # pandas + # via pandas python-dotenv==1.0.1 # via llama-stack python-jose==3.4.0 @@ -258,17 +137,10 @@ python-multipart==0.0.20 # via llama-stack pytz==2025.1 # via pandas -pywin32==308 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32' - # via jupyter-core pyyaml==6.0.2 # via # huggingface-hub - # pre-commit # pyaml -pyzmq==26.2.1 - # via - # ipykernel - # jupyter-client referencing==0.36.2 # via # jsonschema @@ -290,10 +162,6 @@ rpds-py==0.22.3 # referencing rsa==4.9 # via python-jose -ruamel-yaml==0.18.10 -ruamel-yaml-clib==0.2.12 ; python_full_version < '3.13' and platform_python_implementation == 'CPython' - # via ruamel-yaml -ruff==0.9.6 setuptools==80.8.0 # via llama-stack six==1.17.0 @@ -305,8 +173,6 @@ sniffio==1.3.1 # anyio # llama-stack-client # openai -stack-data==0.6.3 - # via ipython starlette==0.45.3 # via # fastapi @@ -318,38 +184,16 @@ termcolor==2.5.0 # llama-stack-client tiktoken==0.9.0 # via llama-stack -tomli==2.2.1 ; python_full_version <= '3.11' - # via - # black - # coverage - # pytest -tornado==6.4.2 - # via - # ipykernel - # jupyter-client tqdm==4.67.1 # via # huggingface-hub # llama-stack-client # openai -traitlets==5.14.3 - # via - # comm - # ipykernel - # ipython - # jupyter-client - # jupyter-core - # matplotlib-inline - # nbformat -types-requests==2.32.0.20241016 -types-setuptools==75.8.0.20250210 typing-extensions==4.12.2 # via # anyio - # black # fastapi # huggingface-hub - # ipython # llama-stack-client # multidict # openai @@ -357,16 +201,10 @@ typing-extensions==4.12.2 # pydantic-core # referencing # rich - # uvicorn tzdata==2025.1 # via pandas urllib3==2.3.0 - # via - # requests - # types-requests -uvicorn==0.34.0 -virtualenv==20.29.2 - # via pre-commit + # via requests wcwidth==0.2.13 # via prompt-toolkit yarl==1.18.3 diff --git a/tests/Containerfile b/tests/Containerfile new file mode 100644 index 000000000..3080d053a --- /dev/null +++ b/tests/Containerfile @@ -0,0 +1,13 @@ +# Containerfile used to build our all in one ollama image to run tests in CI +# podman build --platform linux/amd64 -f Containerfile -t ollama-with-models . +# +FROM --platform=linux/amd64 ollama/ollama:latest + +# Start ollama and pull models in a single layer +RUN ollama serve & \ + sleep 5 && \ + ollama pull llama3.2:3b-instruct-fp16 && \ + ollama pull all-minilm:latest + +# Set the entrypoint to start ollama serve +ENTRYPOINT ["ollama", "serve"]