From dd6e6f9ac2128e197ac860e0d9a0a395ed7f8433 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Tue, 15 Jul 2025 00:02:23 -0400 Subject: [PATCH] chore: Adding OpenAI Vector Stores Files API compatibility for PGVector Signed-off-by: Francisco Javier Arceo --- .../remote/vector_io/pgvector/pgvector.py | 99 ++++++++++++++++++- .../vector_io/test_openai_vector_stores.py | 2 +- 2 files changed, 95 insertions(+), 6 deletions(-) diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index 1bf3eedf8..cd610a3ef 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -273,16 +273,105 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco async def _save_openai_vector_store_file( self, store_id: str, file_id: str, file_info: dict[str, Any], file_contents: list[dict[str, Any]] ) -> None: - raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector") + """Save vector store file metadata to Postgres database.""" + if self.conn is None: + raise RuntimeError("PostgreSQL connection is not initialized") + try: + with self.conn.cursor() as cur: + cur.execute( + """ + CREATE TABLE IF NOT EXISTS openai_vector_store_files ( + store_id TEXT, + file_id TEXT, + metadata JSONB, + PRIMARY KEY (store_id, file_id) + ) + """ + ) + cur.execute( + """ + CREATE TABLE IF NOT EXISTS openai_vector_store_files_contents ( + store_id TEXT, + file_id TEXT, + contents JSONB, + PRIMARY KEY (store_id, file_id) + ) + """ + ) + cur.execute( + "INSERT INTO openai_vector_store_files (store_id, file_id, metadata) VALUES (%s, %s, %s)" + " ON CONFLICT (store_id, file_id) DO UPDATE SET metadata = EXCLUDED.metadata", + (store_id, file_id, Json(file_info)), + ) + cur.execute( + "INSERT INTO openai_vector_store_files_contents (store_id, file_id, contents) VALUES (%s, %s, %s)" + " ON CONFLICT (store_id, file_id) DO UPDATE SET contents = EXCLUDED.contents", + (store_id, file_id, Json(file_contents)), + ) + except Exception as e: + log.error(f"Error saving openai vector store file {file_id} for store {store_id}: {e}") + raise async def _load_openai_vector_store_file(self, store_id: str, file_id: str) -> dict[str, Any]: - raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector") + """Load vector store file metadata from Postgres database.""" + if self.conn is None: + raise RuntimeError("PostgreSQL connection is not initialized") + try: + with self.conn.cursor() as cur: + cur.execute( + "SELECT metadata FROM openai_vector_store_files WHERE store_id = %s AND file_id = %s", + (store_id, file_id), + ) + row = cur.fetchone() + return row[0] if row and row[0] is not None else {} + except Exception as e: + log.error(f"Error loading openai vector store file {file_id} for store {store_id}: {e}") + return {} async def _load_openai_vector_store_file_contents(self, store_id: str, file_id: str) -> list[dict[str, Any]]: - raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector") + """Load vector store file contents from Postgres database.""" + if self.conn is None: + raise RuntimeError("PostgreSQL connection is not initialized") + try: + with self.conn.cursor() as cur: + cur.execute( + "SELECT contents FROM openai_vector_store_files_contents WHERE store_id = %s AND file_id = %s", + (store_id, file_id), + ) + row = cur.fetchone() + return row[0] if row and row[0] is not None else [] + except Exception as e: + log.error(f"Error loading openai vector store file contents for {file_id} in store {store_id}: {e}") + return [] async def _update_openai_vector_store_file(self, store_id: str, file_id: str, file_info: dict[str, Any]) -> None: - raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector") + """Update vector store file metadata in Postgres database.""" + if self.conn is None: + raise RuntimeError("PostgreSQL connection is not initialized") + try: + with self.conn.cursor() as cur: + cur.execute( + "UPDATE openai_vector_store_files SET metadata = %s WHERE store_id = %s AND file_id = %s", + (Json(file_info), store_id, file_id), + ) + except Exception as e: + log.error(f"Error updating openai vector store file {file_id} for store {store_id}: {e}") + raise async def _delete_openai_vector_store_file_from_storage(self, store_id: str, file_id: str) -> None: - raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector") + """Delete vector store file metadata from Postgres database.""" + if self.conn is None: + raise RuntimeError("PostgreSQL connection is not initialized") + try: + with self.conn.cursor() as cur: + cur.execute( + "DELETE FROM openai_vector_store_files WHERE store_id = %s AND file_id = %s", + (store_id, file_id), + ) + cur.execute( + "DELETE FROM openai_vector_store_files_contents WHERE store_id = %s AND file_id = %s", + (store_id, file_id), + ) + except Exception as e: + log.error(f"Error deleting openai vector store file {file_id} for store {store_id}: {e}") + raise diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index cc2860e26..a29fee4af 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -31,7 +31,7 @@ def skip_if_provider_doesnt_support_openai_vector_stores(client_with_models): def skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models): vector_io_providers = [p for p in client_with_models.providers.list() if p.api == "vector_io"] for p in vector_io_providers: - if p.provider_type in ["inline::faiss", "inline::sqlite-vec", "inline::milvus"]: + if p.provider_type in ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::pgvector"]: return pytest.skip("OpenAI vector stores are not supported by any provider")