forked from phoenix-oss/llama-stack-mirror
Previously, the integration tests started the server, but never really used it because `--stack-config=ollama` uses the ollama template and the inline "llama stack as library" client, not the HTTP client. This PR makes sure we test it both ways. We also add agents tests to the mix. ## Test Plan GitHub --------- Signed-off-by: Sébastien Han <seb@redhat.com> Co-authored-by: Sébastien Han <seb@redhat.com>
114 lines
3.7 KiB
YAML
114 lines
3.7 KiB
YAML
name: Integration Tests
|
|
|
|
on:
|
|
push:
|
|
branches: [ main ]
|
|
pull_request:
|
|
branches: [ main ]
|
|
paths:
|
|
- 'distributions/**'
|
|
- 'llama_stack/**'
|
|
- 'tests/integration/**'
|
|
- 'uv.lock'
|
|
- 'pyproject.toml'
|
|
- 'requirements.txt'
|
|
- '.github/workflows/integration-tests.yml' # This workflow
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.ref }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
test-matrix:
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
matrix:
|
|
# Listing tests manually since some of them currently fail
|
|
# TODO: generate matrix list from tests/integration when fixed
|
|
test-type: [agents, inference, datasets, inspect, scoring, post_training, providers]
|
|
client-type: [library, http]
|
|
fail-fast: false # we want to run all tests regardless of failure
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Install uv
|
|
uses: astral-sh/setup-uv@v5
|
|
with:
|
|
python-version: "3.10"
|
|
|
|
- name: Install Ollama
|
|
run: |
|
|
curl -fsSL https://ollama.com/install.sh | sh
|
|
|
|
- name: Pull Ollama image
|
|
run: |
|
|
ollama pull llama3.2:3b-instruct-fp16
|
|
|
|
- name: Start Ollama in background
|
|
run: |
|
|
nohup ollama run llama3.2:3b-instruct-fp16 > ollama.log 2>&1 &
|
|
|
|
- name: Set Up Environment and Install Dependencies
|
|
run: |
|
|
uv sync --extra dev --extra test
|
|
uv pip install ollama faiss-cpu
|
|
# always test against the latest version of the client
|
|
# TODO: this is not necessarily a good idea. we need to test against both published and latest
|
|
# to find out backwards compatibility issues.
|
|
uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
|
|
uv pip install -e .
|
|
llama stack build --template ollama --image-type venv
|
|
|
|
- name: Wait for Ollama to start
|
|
run: |
|
|
echo "Waiting for Ollama..."
|
|
for i in {1..30}; do
|
|
if curl -s http://localhost:11434 | grep -q "Ollama is running"; then
|
|
echo "Ollama is running!"
|
|
exit 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
echo "Ollama failed to start"
|
|
ollama ps
|
|
ollama.log
|
|
exit 1
|
|
|
|
- name: Start Llama Stack server in background
|
|
if: matrix.client-type == 'http'
|
|
env:
|
|
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
run: |
|
|
source .venv/bin/activate
|
|
nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv > server.log 2>&1 &
|
|
|
|
- name: Wait for Llama Stack server to be ready
|
|
if: matrix.client-type == 'http'
|
|
run: |
|
|
echo "Waiting for Llama Stack server..."
|
|
for i in {1..30}; do
|
|
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
|
|
echo "Llama Stack server is up!"
|
|
exit 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
echo "Llama Stack server failed to start"
|
|
cat server.log
|
|
exit 1
|
|
|
|
- name: Run Integration Tests
|
|
env:
|
|
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
run: |
|
|
if [ "${{ matrix.client-type }}" == "library" ]; then
|
|
stack_config="ollama"
|
|
else
|
|
stack_config="http://localhost:8321"
|
|
fi
|
|
uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
|
|
-k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
|
|
--text-model="meta-llama/Llama-3.2-3B-Instruct" \
|
|
--embedding-model=all-MiniLM-L6-v2
|