From 6a7fe6312ed092119a7a87b762a47a751b058c2a Mon Sep 17 00:00:00 2001 From: Daniele Martinoli Date: Wed, 26 Feb 2025 10:00:37 +0100 Subject: [PATCH] qdrant inline provider Signed-off-by: Daniele Martinoli --- distributions/dependencies.json | 1 + docs/source/providers/vector_io/qdrant.md | 4 ++-- .../inline/vector_io/qdrant/__init__.py | 19 +++++++++++++++++ .../inline/vector_io/qdrant/config.py | 21 +++++++++++++++++++ llama_stack/providers/registry/vector_io.py | 18 ++++++++++++++++ .../remote/vector_io/qdrant/config.py | 1 - .../remote/vector_io/qdrant/qdrant.py | 9 +++++--- 7 files changed, 67 insertions(+), 6 deletions(-) create mode 100644 llama_stack/providers/inline/vector_io/qdrant/__init__.py create mode 100644 llama_stack/providers/inline/vector_io/qdrant/config.py diff --git a/distributions/dependencies.json b/distributions/dependencies.json index c3f039247..21736b228 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -444,6 +444,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "qdrant-client", "redis", "requests", "scikit-learn", diff --git a/docs/source/providers/vector_io/qdrant.md b/docs/source/providers/vector_io/qdrant.md index a0de0be98..fb2275391 100644 --- a/docs/source/providers/vector_io/qdrant.md +++ b/docs/source/providers/vector_io/qdrant.md @@ -3,7 +3,7 @@ orphan: true --- # Qdrant -[Qdrant](https://qdrant.tech/documentation/) is a remote vector database provider for Llama Stack. It +[Qdrant](https://qdrant.tech/documentation/) is a inline and remote vector database provider for Llama Stack. It allows you to store and query vectors directly in memory. That means you'll get fast and efficient vector retrieval. @@ -17,7 +17,7 @@ That means you'll get fast and efficient vector retrieval. To use Qdrant in your Llama Stack project, follow these steps: 1. Install the necessary dependencies. -2. Configure your Llama Stack project to use Faiss. +2. Configure your Llama Stack project to use Qdrant. 3. Start storing and querying vectors. ## Installation diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/llama_stack/providers/inline/vector_io/qdrant/__init__.py new file mode 100644 index 000000000..7e80ae16b --- /dev/null +++ b/llama_stack/providers/inline/vector_io/qdrant/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Dict + +from llama_stack.providers.datatypes import Api, ProviderSpec + +from .config import QdrantVectorIOConfig + + +async def get_provider_impl(config: QdrantVectorIOConfig, deps: Dict[Api, ProviderSpec]): + from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter + + impl = QdrantVectorIOAdapter(config, deps[Api.inference]) + await impl.initialize() + return impl diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py new file mode 100644 index 000000000..ff9d6ce58 --- /dev/null +++ b/llama_stack/providers/inline/vector_io/qdrant/config.py @@ -0,0 +1,21 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from pydantic import BaseModel + +from llama_stack.schema_utils import json_schema_type + + +@json_schema_type +class QdrantVectorIOConfig(BaseModel): + path: str + + @classmethod + def sample_run_config(cls, __distro_dir__: str) -> dict[str, any]: + return { + "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", + } diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py index fbc495d83..1f68f60fd 100644 --- a/llama_stack/providers/registry/vector_io.py +++ b/llama_stack/providers/registry/vector_io.py @@ -92,6 +92,24 @@ def available_providers() -> List[ProviderSpec]: ), api_dependencies=[Api.inference], ), + remote_provider_spec( + api=Api.vector_io, + adapter=AdapterSpec( + adapter_type="sample", + pip_packages=[], + module="llama_stack.providers.remote.vector_io.sample", + config_class="llama_stack.providers.remote.vector_io.sample.SampleVectorIOConfig", + ), + api_dependencies=[], + ), + InlineProviderSpec( + api=Api.vector_io, + provider_type="inline::qdrant", + pip_packages=["qdrant-client"], + module="llama_stack.providers.inline.vector_io.qdrant", + config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig", + api_dependencies=[Api.inference], + ), remote_provider_spec( Api.vector_io, AdapterSpec( diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py index ce68aa492..6d7eebe23 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/config.py +++ b/llama_stack/providers/remote/vector_io/qdrant/config.py @@ -23,7 +23,6 @@ class QdrantVectorIOConfig(BaseModel): prefix: Optional[str] = None timeout: Optional[int] = None host: Optional[str] = None - path: Optional[str] = None @classmethod def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]: diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 586b8ca95..f8e6fea1a 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -6,7 +6,7 @@ import logging import uuid -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from numpy.typing import NDArray from qdrant_client import AsyncQdrantClient, models @@ -16,12 +16,13 @@ from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate +from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.utils.memory.vector_store import ( EmbeddingIndex, VectorDBWithIndex, ) -from .config import QdrantVectorIOConfig +from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig log = logging.getLogger(__name__) CHUNK_ID_KEY = "_chunk_id" @@ -99,7 +100,9 @@ class QdrantIndex(EmbeddingIndex): class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate): - def __init__(self, config: QdrantVectorIOConfig, inference_api: Api.inference) -> None: + def __init__( + self, config: Union[RemoteQdrantVectorIOConfig, InlineQdrantVectorIOConfig], inference_api: Api.inference + ) -> None: self.config = config self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True)) self.cache = {}