mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
ci: vector_io provider integration tests (#2537)
Runs integration tests for `vector_io` across the provider matrix. This new workflow adds CI testing across - `inline::faiss`, `remote::chroma`.
This commit is contained in:
parent
68d8f2186f
commit
eb01a3f1c5
3 changed files with 263 additions and 1 deletions
142
.github/workflows/integration-vector-io-tests.yml
vendored
Normal file
142
.github/workflows/integration-vector-io-tests.yml
vendored
Normal file
|
@ -0,0 +1,142 @@
|
||||||
|
name: Vector IO Integration Tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
paths:
|
||||||
|
- 'llama_stack/**'
|
||||||
|
- 'tests/integration/vector_io/**'
|
||||||
|
- 'uv.lock'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
- 'requirements.txt'
|
||||||
|
- '.github/workflows/integration-vector-io-tests.yml' # This workflow
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test-matrix:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "remote::chromadb", "remote::pgvector"]
|
||||||
|
python-version: ["3.12", "3.13"]
|
||||||
|
fail-fast: false # we want to run all tests regardless of failure
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
uses: ./.github/actions/setup-runner
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
|
||||||
|
- name: Setup Chroma
|
||||||
|
if: matrix.vector-io-provider == 'remote::chromadb'
|
||||||
|
run: |
|
||||||
|
docker run --rm -d --pull always \
|
||||||
|
--name chromadb \
|
||||||
|
-p 8000:8000 \
|
||||||
|
-v ~/chroma:/chroma/chroma \
|
||||||
|
-e IS_PERSISTENT=TRUE \
|
||||||
|
-e ANONYMIZED_TELEMETRY=FALSE \
|
||||||
|
chromadb/chroma:latest
|
||||||
|
|
||||||
|
- name: Start PGVector DB
|
||||||
|
if: matrix.vector-io-provider == 'remote::pgvector'
|
||||||
|
run: |
|
||||||
|
docker run -d \
|
||||||
|
--name pgvector \
|
||||||
|
-e POSTGRES_USER=llamastack \
|
||||||
|
-e POSTGRES_PASSWORD=llamastack \
|
||||||
|
-e POSTGRES_DB=llamastack \
|
||||||
|
-p 5432:5432 \
|
||||||
|
pgvector/pgvector:pg17
|
||||||
|
|
||||||
|
- name: Wait for PGVector to be ready
|
||||||
|
if: matrix.vector-io-provider == 'remote::pgvector'
|
||||||
|
run: |
|
||||||
|
echo "Waiting for Postgres to be ready..."
|
||||||
|
for i in {1..30}; do
|
||||||
|
if docker exec pgvector pg_isready -U llamastack > /dev/null 2>&1; then
|
||||||
|
echo "Postgres is ready!"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo "Not ready yet... ($i)"
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Enable pgvector extension
|
||||||
|
if: matrix.vector-io-provider == 'remote::pgvector'
|
||||||
|
run: |
|
||||||
|
PGPASSWORD=llamastack psql -h localhost -U llamastack -d llamastack \
|
||||||
|
-c "CREATE EXTENSION IF NOT EXISTS vector;"
|
||||||
|
|
||||||
|
- name: Wait for ChromaDB to be ready
|
||||||
|
if: matrix.vector-io-provider == 'remote::chromadb'
|
||||||
|
run: |
|
||||||
|
echo "Waiting for ChromaDB to be ready..."
|
||||||
|
for i in {1..30}; do
|
||||||
|
if curl -s http://localhost:8000/api/v2/heartbeat | grep -q "nanosecond heartbeat"; then
|
||||||
|
echo "ChromaDB is ready!"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
echo "ChromaDB failed to start"
|
||||||
|
docker logs chromadb
|
||||||
|
exit 1
|
||||||
|
|
||||||
|
- name: Build Llama Stack
|
||||||
|
run: |
|
||||||
|
uv run llama stack build --template starter --image-type venv
|
||||||
|
|
||||||
|
- name: Check Storage and Memory Available Before Tests
|
||||||
|
if: ${{ always() }}
|
||||||
|
run: |
|
||||||
|
free -h
|
||||||
|
df -h
|
||||||
|
|
||||||
|
- name: Run Vector IO Integration Tests
|
||||||
|
env:
|
||||||
|
ENABLE_CHROMADB: ${{ matrix.vector-io-provider == 'remote::chromadb' && 'true' || '' }}
|
||||||
|
CHROMADB_URL: ${{ matrix.vector-io-provider == 'remote::chromadb' && 'http://localhost:8000' || '' }}
|
||||||
|
ENABLE_PGVECTOR: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'true' || '' }}
|
||||||
|
PGVECTOR_HOST: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'localhost' || '' }}
|
||||||
|
PGVECTOR_PORT: ${{ matrix.vector-io-provider == 'remote::pgvector' && '5432' || '' }}
|
||||||
|
PGVECTOR_DB: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
|
||||||
|
PGVECTOR_USER: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
|
||||||
|
PGVECTOR_PASSWORD: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
|
||||||
|
run: |
|
||||||
|
uv run pytest -sv --stack-config="inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
|
||||||
|
tests/integration/vector_io \
|
||||||
|
--embedding-model all-MiniLM-L6-v2
|
||||||
|
|
||||||
|
- name: Check Storage and Memory Available After Tests
|
||||||
|
if: ${{ always() }}
|
||||||
|
run: |
|
||||||
|
free -h
|
||||||
|
df -h
|
||||||
|
|
||||||
|
- name: Create sanitized provider name
|
||||||
|
if: ${{ always() }}
|
||||||
|
run: |
|
||||||
|
echo "SANITIZED_PROVIDER=$(echo "${{ matrix.vector-io-provider }}" | tr ':' '_')" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Write ChromaDB logs to file
|
||||||
|
if: ${{ always() && matrix.vector-io-provider == 'remote::chromadb' }}
|
||||||
|
run: |
|
||||||
|
docker logs chromadb > chromadb.log
|
||||||
|
|
||||||
|
- name: Upload all logs to artifacts
|
||||||
|
if: ${{ always() }}
|
||||||
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||||
|
with:
|
||||||
|
name: vector-io-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ env.SANITIZED_PROVIDER }}-${{ matrix.python-version }}
|
||||||
|
path: |
|
||||||
|
*.log
|
||||||
|
retention-days: 1
|
|
@ -15,7 +15,21 @@ from pydantic import BaseModel, TypeAdapter
|
||||||
|
|
||||||
from llama_stack.apis.inference import InterleavedContent
|
from llama_stack.apis.inference import InterleavedContent
|
||||||
from llama_stack.apis.vector_dbs import VectorDB
|
from llama_stack.apis.vector_dbs import VectorDB
|
||||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
from llama_stack.apis.vector_io import (
|
||||||
|
Chunk,
|
||||||
|
QueryChunksResponse,
|
||||||
|
SearchRankingOptions,
|
||||||
|
VectorIO,
|
||||||
|
VectorStoreChunkingStrategy,
|
||||||
|
VectorStoreDeleteResponse,
|
||||||
|
VectorStoreFileContentsResponse,
|
||||||
|
VectorStoreFileObject,
|
||||||
|
VectorStoreFileStatus,
|
||||||
|
VectorStoreListFilesResponse,
|
||||||
|
VectorStoreListResponse,
|
||||||
|
VectorStoreObject,
|
||||||
|
VectorStoreSearchResponsePage,
|
||||||
|
)
|
||||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||||
from llama_stack.providers.utils.memory.vector_store import (
|
from llama_stack.providers.utils.memory.vector_store import (
|
||||||
EmbeddingIndex,
|
EmbeddingIndex,
|
||||||
|
@ -222,3 +236,108 @@ class PGVectorVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
||||||
index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
|
index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
|
||||||
self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
|
self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
|
||||||
return self.cache[vector_db_id]
|
return self.cache[vector_db_id]
|
||||||
|
|
||||||
|
async def openai_create_vector_store(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
file_ids: list[str] | None = None,
|
||||||
|
expires_after: dict[str, Any] | None = None,
|
||||||
|
chunking_strategy: dict[str, Any] | None = None,
|
||||||
|
metadata: dict[str, Any] | None = None,
|
||||||
|
embedding_model: str | None = None,
|
||||||
|
embedding_dimension: int | None = 384,
|
||||||
|
provider_id: str | None = None,
|
||||||
|
provider_vector_db_id: str | None = None,
|
||||||
|
) -> VectorStoreObject:
|
||||||
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||||
|
|
||||||
|
async def openai_list_vector_stores(
|
||||||
|
self,
|
||||||
|
limit: int | None = 20,
|
||||||
|
order: str | None = "desc",
|
||||||
|
after: str | None = None,
|
||||||
|
before: str | None = None,
|
||||||
|
) -> VectorStoreListResponse:
|
||||||
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||||
|
|
||||||
|
async def openai_retrieve_vector_store(
|
||||||
|
self,
|
||||||
|
vector_store_id: str,
|
||||||
|
) -> VectorStoreObject:
|
||||||
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||||
|
|
||||||
|
async def openai_update_vector_store(
|
||||||
|
self,
|
||||||
|
vector_store_id: str,
|
||||||
|
name: str | None = None,
|
||||||
|
expires_after: dict[str, Any] | None = None,
|
||||||
|
metadata: dict[str, Any] | None = None,
|
||||||
|
) -> VectorStoreObject:
|
||||||
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||||
|
|
||||||
|
async def openai_delete_vector_store(
|
||||||
|
self,
|
||||||
|
vector_store_id: str,
|
||||||
|
) -> VectorStoreDeleteResponse:
|
||||||
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||||
|
|
||||||
|
async def openai_search_vector_store(
|
||||||
|
self,
|
||||||
|
vector_store_id: str,
|
||||||
|
query: str | list[str],
|
||||||
|
filters: dict[str, Any] | None = None,
|
||||||
|
max_num_results: int | None = 10,
|
||||||
|
ranking_options: SearchRankingOptions | None = None,
|
||||||
|
rewrite_query: bool | None = False,
|
||||||
|
search_mode: str | None = "vector",
|
||||||
|
) -> VectorStoreSearchResponsePage:
|
||||||
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||||
|
|
||||||
|
async def openai_attach_file_to_vector_store(
|
||||||
|
self,
|
||||||
|
vector_store_id: str,
|
||||||
|
file_id: str,
|
||||||
|
attributes: dict[str, Any] | None = None,
|
||||||
|
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
||||||
|
) -> VectorStoreFileObject:
|
||||||
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||||
|
|
||||||
|
async def openai_list_files_in_vector_store(
|
||||||
|
self,
|
||||||
|
vector_store_id: str,
|
||||||
|
limit: int | None = 20,
|
||||||
|
order: str | None = "desc",
|
||||||
|
after: str | None = None,
|
||||||
|
before: str | None = None,
|
||||||
|
filter: VectorStoreFileStatus | None = None,
|
||||||
|
) -> VectorStoreListFilesResponse:
|
||||||
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||||
|
|
||||||
|
async def openai_retrieve_vector_store_file(
|
||||||
|
self,
|
||||||
|
vector_store_id: str,
|
||||||
|
file_id: str,
|
||||||
|
) -> VectorStoreFileObject:
|
||||||
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||||
|
|
||||||
|
async def openai_retrieve_vector_store_file_contents(
|
||||||
|
self,
|
||||||
|
vector_store_id: str,
|
||||||
|
file_id: str,
|
||||||
|
) -> VectorStoreFileContentsResponse:
|
||||||
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||||
|
|
||||||
|
async def openai_update_vector_store_file(
|
||||||
|
self,
|
||||||
|
vector_store_id: str,
|
||||||
|
file_id: str,
|
||||||
|
attributes: dict[str, Any] | None = None,
|
||||||
|
) -> VectorStoreFileObject:
|
||||||
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||||
|
|
||||||
|
async def openai_delete_vector_store_file(
|
||||||
|
self,
|
||||||
|
vector_store_id: str,
|
||||||
|
file_id: str,
|
||||||
|
) -> VectorStoreFileObject:
|
||||||
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||||
|
|
|
@ -29,6 +29,7 @@ mapfile -t py_dirs < <(
|
||||||
-type f \
|
-type f \
|
||||||
-name "*.py" ! -name "__init__.py" \
|
-name "*.py" ! -name "__init__.py" \
|
||||||
! -path "*/.venv/*" \
|
! -path "*/.venv/*" \
|
||||||
|
! -path "*/node_modules/*" \
|
||||||
-exec dirname {} \; | sort -u
|
-exec dirname {} \; | sort -u
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue