mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-16 01:53:10 +00:00
feat: Add OpenAI compat /v1/vector_store APIs (#2423)
Some checks failed
Integration Tests / test-matrix (http, 3.10, inference) (push) Failing after 28s
Integration Tests / test-matrix (http, 3.11, agents) (push) Failing after 26s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 32s
Integration Tests / test-matrix (http, 3.11, scoring) (push) Failing after 24s
Integration Tests / test-matrix (http, 3.10, inspect) (push) Failing after 32s
Integration Tests / test-matrix (http, 3.11, datasets) (push) Failing after 30s
Integration Tests / test-matrix (http, 3.11, tool_runtime) (push) Failing after 28s
Integration Tests / test-matrix (http, 3.12, inference) (push) Failing after 28s
Integration Tests / test-matrix (http, 3.12, providers) (push) Failing after 26s
Integration Tests / test-matrix (http, 3.12, tool_runtime) (push) Failing after 24s
Integration Tests / test-matrix (http, 3.10, scoring) (push) Failing after 32s
Integration Tests / test-matrix (http, 3.11, inference) (push) Failing after 39s
Integration Tests / test-matrix (http, 3.12, scoring) (push) Failing after 35s
Integration Tests / test-matrix (http, 3.10, datasets) (push) Failing after 47s
Integration Tests / test-matrix (http, 3.12, post_training) (push) Failing after 22s
Integration Tests / test-matrix (http, 3.11, inspect) (push) Failing after 42s
Integration Tests / test-matrix (http, 3.11, post_training) (push) Failing after 31s
Integration Tests / test-matrix (http, 3.11, providers) (push) Failing after 44s
Integration Tests / test-matrix (http, 3.12, datasets) (push) Failing after 40s
Integration Tests / test-matrix (http, 3.12, inspect) (push) Failing after 42s
Integration Tests / test-matrix (http, 3.10, providers) (push) Failing after 44s
Integration Tests / test-matrix (http, 3.10, agents) (push) Failing after 33s
Integration Tests / test-matrix (http, 3.12, agents) (push) Failing after 37s
Integration Tests / test-matrix (library, 3.10, inspect) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.10, inference) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.10, agents) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.10, providers) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.10, scoring) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.10, post_training) (push) Failing after 41s
Integration Tests / test-matrix (library, 3.10, datasets) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.10, post_training) (push) Failing after 13s
Integration Tests / test-matrix (http, 3.10, tool_runtime) (push) Failing after 46s
Integration Tests / test-matrix (library, 3.10, tool_runtime) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.11, agents) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.11, inference) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.11, post_training) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.11, datasets) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.11, inspect) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.11, providers) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.11, tool_runtime) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.11, scoring) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 5s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 16s
Test External Providers / test-external-providers (venv) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 13s
Update ReadTheDocs / update-readthedocs (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 11s
Unit Tests / unit-tests (3.12) (push) Failing after 1m31s
Unit Tests / unit-tests (3.11) (push) Failing after 1m33s
Unit Tests / unit-tests (3.10) (push) Failing after 1m35s
Pre-commit / pre-commit (push) Failing after 3h13m41s
Some checks failed
Integration Tests / test-matrix (http, 3.10, inference) (push) Failing after 28s
Integration Tests / test-matrix (http, 3.11, agents) (push) Failing after 26s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 32s
Integration Tests / test-matrix (http, 3.11, scoring) (push) Failing after 24s
Integration Tests / test-matrix (http, 3.10, inspect) (push) Failing after 32s
Integration Tests / test-matrix (http, 3.11, datasets) (push) Failing after 30s
Integration Tests / test-matrix (http, 3.11, tool_runtime) (push) Failing after 28s
Integration Tests / test-matrix (http, 3.12, inference) (push) Failing after 28s
Integration Tests / test-matrix (http, 3.12, providers) (push) Failing after 26s
Integration Tests / test-matrix (http, 3.12, tool_runtime) (push) Failing after 24s
Integration Tests / test-matrix (http, 3.10, scoring) (push) Failing after 32s
Integration Tests / test-matrix (http, 3.11, inference) (push) Failing after 39s
Integration Tests / test-matrix (http, 3.12, scoring) (push) Failing after 35s
Integration Tests / test-matrix (http, 3.10, datasets) (push) Failing after 47s
Integration Tests / test-matrix (http, 3.12, post_training) (push) Failing after 22s
Integration Tests / test-matrix (http, 3.11, inspect) (push) Failing after 42s
Integration Tests / test-matrix (http, 3.11, post_training) (push) Failing after 31s
Integration Tests / test-matrix (http, 3.11, providers) (push) Failing after 44s
Integration Tests / test-matrix (http, 3.12, datasets) (push) Failing after 40s
Integration Tests / test-matrix (http, 3.12, inspect) (push) Failing after 42s
Integration Tests / test-matrix (http, 3.10, providers) (push) Failing after 44s
Integration Tests / test-matrix (http, 3.10, agents) (push) Failing after 33s
Integration Tests / test-matrix (http, 3.12, agents) (push) Failing after 37s
Integration Tests / test-matrix (library, 3.10, inspect) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.10, inference) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.10, agents) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.10, providers) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.10, scoring) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.10, post_training) (push) Failing after 41s
Integration Tests / test-matrix (library, 3.10, datasets) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.10, post_training) (push) Failing after 13s
Integration Tests / test-matrix (http, 3.10, tool_runtime) (push) Failing after 46s
Integration Tests / test-matrix (library, 3.10, tool_runtime) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.11, agents) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.11, inference) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.11, post_training) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.11, datasets) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.11, inspect) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.11, providers) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.11, tool_runtime) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.11, scoring) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 5s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 16s
Test External Providers / test-external-providers (venv) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 13s
Update ReadTheDocs / update-readthedocs (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 11s
Unit Tests / unit-tests (3.12) (push) Failing after 1m31s
Unit Tests / unit-tests (3.11) (push) Failing after 1m33s
Unit Tests / unit-tests (3.10) (push) Failing after 1m35s
Pre-commit / pre-commit (push) Failing after 3h13m41s
Adding OpenAI compat `/v1/vector-store` apis. This PR implements the `faiss` provider with followup PRs coming up for other providers. Added routes to create, update, delete, list vector stores. Also added route to search a vector store Inserting into vector stores is missing and will be a follow up diff. ### Test Plan - Added new integration test for testing the faiss provider ``` pytest -sv --stack-config http://localhost:8321 tests/integration/vector_io/test_openai_vector_stores.py --embedding-model all-MiniLM-L6-v2 ```
This commit is contained in:
parent
ee57e58f29
commit
5ac43268e8
10 changed files with 2930 additions and 16 deletions
|
@ -6,7 +6,7 @@
|
|||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
from typing import Any, Literal
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import chromadb
|
||||
|
@ -14,7 +14,15 @@ from numpy.typing import NDArray
|
|||
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
QueryChunksResponse,
|
||||
VectorIO,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponse,
|
||||
)
|
||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
|
@ -178,3 +186,59 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
|
||||
self.cache[vector_db_id] = index
|
||||
return index
|
||||
|
||||
async def openai_create_vector_store(
|
||||
self,
|
||||
name: str | None = None,
|
||||
file_ids: list[str] | None = None,
|
||||
expires_after: dict[str, Any] | None = None,
|
||||
chunking_strategy: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
embedding_model: str | None = None,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
|
||||
|
||||
async def openai_list_vector_stores(
|
||||
self,
|
||||
limit: int = 20,
|
||||
order: str = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
) -> VectorStoreListResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
|
||||
|
||||
async def openai_retrieve_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
|
||||
|
||||
async def openai_update_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
name: str | None = None,
|
||||
expires_after: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
|
||||
|
||||
async def openai_delete_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreDeleteResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
|
||||
|
||||
async def openai_search_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
query: str | list[str],
|
||||
filters: dict[str, Any] | None = None,
|
||||
max_num_results: int = 10,
|
||||
ranking_options: dict[str, Any] | None = None,
|
||||
rewrite_query: bool = False,
|
||||
search_mode: Literal["keyword", "vector", "hybrid"] = "vector",
|
||||
) -> VectorStoreSearchResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
|
||||
|
|
|
@ -9,14 +9,22 @@ import hashlib
|
|||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from typing import Any
|
||||
from typing import Any, Literal
|
||||
|
||||
from numpy.typing import NDArray
|
||||
from pymilvus import MilvusClient
|
||||
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
QueryChunksResponse,
|
||||
VectorIO,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponse,
|
||||
)
|
||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
|
@ -177,6 +185,62 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
|
||||
return await index.query_chunks(query, params)
|
||||
|
||||
async def openai_create_vector_store(
|
||||
self,
|
||||
name: str | None = None,
|
||||
file_ids: list[str] | None = None,
|
||||
expires_after: dict[str, Any] | None = None,
|
||||
chunking_strategy: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
embedding_model: str | None = None,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
async def openai_list_vector_stores(
|
||||
self,
|
||||
limit: int = 20,
|
||||
order: str = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
) -> VectorStoreListResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
async def openai_retrieve_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
async def openai_update_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
name: str | None = None,
|
||||
expires_after: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
async def openai_delete_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreDeleteResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
async def openai_search_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
query: str | list[str],
|
||||
filters: dict[str, Any] | None = None,
|
||||
max_num_results: int = 10,
|
||||
ranking_options: dict[str, Any] | None = None,
|
||||
rewrite_query: bool = False,
|
||||
search_mode: Literal["keyword", "vector", "hybrid"] = "vector",
|
||||
) -> VectorStoreSearchResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
|
||||
def generate_chunk_id(document_id: str, chunk_text: str) -> str:
|
||||
"""Generate a unique chunk ID using a hash of document ID and chunk text."""
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Any
|
||||
from typing import Any, Literal
|
||||
|
||||
from numpy.typing import NDArray
|
||||
from qdrant_client import AsyncQdrantClient, models
|
||||
|
@ -14,7 +14,15 @@ from qdrant_client.models import PointStruct
|
|||
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
QueryChunksResponse,
|
||||
VectorIO,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponse,
|
||||
)
|
||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
|
@ -178,3 +186,59 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
raise ValueError(f"Vector DB {vector_db_id} not found")
|
||||
|
||||
return await index.query_chunks(query, params)
|
||||
|
||||
async def openai_create_vector_store(
|
||||
self,
|
||||
name: str | None = None,
|
||||
file_ids: list[str] | None = None,
|
||||
expires_after: dict[str, Any] | None = None,
|
||||
chunking_strategy: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
embedding_model: str | None = None,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
async def openai_list_vector_stores(
|
||||
self,
|
||||
limit: int = 20,
|
||||
order: str = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
) -> VectorStoreListResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
async def openai_retrieve_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
async def openai_update_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
name: str | None = None,
|
||||
expires_after: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
async def openai_delete_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreDeleteResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
async def openai_search_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
query: str | list[str],
|
||||
filters: dict[str, Any] | None = None,
|
||||
max_num_results: int = 10,
|
||||
ranking_options: dict[str, Any] | None = None,
|
||||
rewrite_query: bool = False,
|
||||
search_mode: Literal["keyword", "vector", "hybrid"] = "vector",
|
||||
) -> VectorStoreSearchResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue