llama-stack-mirror/.github/workflows/integration-vector-io-tests.yml
Hardik Shah eb01a3f1c5
ci: vector_io provider integration tests (#2537)
Runs integration tests for `vector_io` across the provider matrix. 
This new workflow adds CI testing across - `inline::faiss`,
`remote::chroma`.
2025-06-26 17:04:32 -07:00

142 lines
5 KiB
YAML

name: Vector IO Integration Tests
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
paths:
- 'llama_stack/**'
- 'tests/integration/vector_io/**'
- 'uv.lock'
- 'pyproject.toml'
- 'requirements.txt'
- '.github/workflows/integration-vector-io-tests.yml' # This workflow
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
test-matrix:
runs-on: ubuntu-latest
strategy:
matrix:
vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "remote::chromadb", "remote::pgvector"]
python-version: ["3.12", "3.13"]
fail-fast: false # we want to run all tests regardless of failure
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install dependencies
uses: ./.github/actions/setup-runner
with:
python-version: ${{ matrix.python-version }}
- name: Setup Chroma
if: matrix.vector-io-provider == 'remote::chromadb'
run: |
docker run --rm -d --pull always \
--name chromadb \
-p 8000:8000 \
-v ~/chroma:/chroma/chroma \
-e IS_PERSISTENT=TRUE \
-e ANONYMIZED_TELEMETRY=FALSE \
chromadb/chroma:latest
- name: Start PGVector DB
if: matrix.vector-io-provider == 'remote::pgvector'
run: |
docker run -d \
--name pgvector \
-e POSTGRES_USER=llamastack \
-e POSTGRES_PASSWORD=llamastack \
-e POSTGRES_DB=llamastack \
-p 5432:5432 \
pgvector/pgvector:pg17
- name: Wait for PGVector to be ready
if: matrix.vector-io-provider == 'remote::pgvector'
run: |
echo "Waiting for Postgres to be ready..."
for i in {1..30}; do
if docker exec pgvector pg_isready -U llamastack > /dev/null 2>&1; then
echo "Postgres is ready!"
break
fi
echo "Not ready yet... ($i)"
sleep 1
done
- name: Enable pgvector extension
if: matrix.vector-io-provider == 'remote::pgvector'
run: |
PGPASSWORD=llamastack psql -h localhost -U llamastack -d llamastack \
-c "CREATE EXTENSION IF NOT EXISTS vector;"
- name: Wait for ChromaDB to be ready
if: matrix.vector-io-provider == 'remote::chromadb'
run: |
echo "Waiting for ChromaDB to be ready..."
for i in {1..30}; do
if curl -s http://localhost:8000/api/v2/heartbeat | grep -q "nanosecond heartbeat"; then
echo "ChromaDB is ready!"
exit 0
fi
sleep 2
done
echo "ChromaDB failed to start"
docker logs chromadb
exit 1
- name: Build Llama Stack
run: |
uv run llama stack build --template starter --image-type venv
- name: Check Storage and Memory Available Before Tests
if: ${{ always() }}
run: |
free -h
df -h
- name: Run Vector IO Integration Tests
env:
ENABLE_CHROMADB: ${{ matrix.vector-io-provider == 'remote::chromadb' && 'true' || '' }}
CHROMADB_URL: ${{ matrix.vector-io-provider == 'remote::chromadb' && 'http://localhost:8000' || '' }}
ENABLE_PGVECTOR: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'true' || '' }}
PGVECTOR_HOST: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'localhost' || '' }}
PGVECTOR_PORT: ${{ matrix.vector-io-provider == 'remote::pgvector' && '5432' || '' }}
PGVECTOR_DB: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
PGVECTOR_USER: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
PGVECTOR_PASSWORD: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
run: |
uv run pytest -sv --stack-config="inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
tests/integration/vector_io \
--embedding-model all-MiniLM-L6-v2
- name: Check Storage and Memory Available After Tests
if: ${{ always() }}
run: |
free -h
df -h
- name: Create sanitized provider name
if: ${{ always() }}
run: |
echo "SANITIZED_PROVIDER=$(echo "${{ matrix.vector-io-provider }}" | tr ':' '_')" >> $GITHUB_ENV
- name: Write ChromaDB logs to file
if: ${{ always() && matrix.vector-io-provider == 'remote::chromadb' }}
run: |
docker logs chromadb > chromadb.log
- name: Upload all logs to artifacts
if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: vector-io-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ env.SANITIZED_PROVIDER }}-${{ matrix.python-version }}
path: |
*.log
retention-days: 1