qdrant inline provider

Signed-off-by: Daniele Martinoli <dmartino@redhat.com>
This commit is contained in:
Daniele Martinoli 2025-02-26 10:00:37 +01:00
parent bfc79217a8
commit 6a7fe6312e
7 changed files with 67 additions and 6 deletions

View file

@ -444,6 +444,7 @@
"psycopg2-binary", "psycopg2-binary",
"pymongo", "pymongo",
"pypdf", "pypdf",
"qdrant-client",
"redis", "redis",
"requests", "requests",
"scikit-learn", "scikit-learn",

View file

@ -3,7 +3,7 @@ orphan: true
--- ---
# Qdrant # Qdrant
[Qdrant](https://qdrant.tech/documentation/) is a remote vector database provider for Llama Stack. It [Qdrant](https://qdrant.tech/documentation/) is a inline and remote vector database provider for Llama Stack. It
allows you to store and query vectors directly in memory. allows you to store and query vectors directly in memory.
That means you'll get fast and efficient vector retrieval. That means you'll get fast and efficient vector retrieval.
@ -17,7 +17,7 @@ That means you'll get fast and efficient vector retrieval.
To use Qdrant in your Llama Stack project, follow these steps: To use Qdrant in your Llama Stack project, follow these steps:
1. Install the necessary dependencies. 1. Install the necessary dependencies.
2. Configure your Llama Stack project to use Faiss. 2. Configure your Llama Stack project to use Qdrant.
3. Start storing and querying vectors. 3. Start storing and querying vectors.
## Installation ## Installation

View file

@ -0,0 +1,19 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Dict
from llama_stack.providers.datatypes import Api, ProviderSpec
from .config import QdrantVectorIOConfig
async def get_provider_impl(config: QdrantVectorIOConfig, deps: Dict[Api, ProviderSpec]):
from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
impl = QdrantVectorIOAdapter(config, deps[Api.inference])
await impl.initialize()
return impl

View file

@ -0,0 +1,21 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from pydantic import BaseModel
from llama_stack.schema_utils import json_schema_type
@json_schema_type
class QdrantVectorIOConfig(BaseModel):
path: str
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, any]:
return {
"path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
}

View file

@ -92,6 +92,24 @@ def available_providers() -> List[ProviderSpec]:
), ),
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
), ),
remote_provider_spec(
api=Api.vector_io,
adapter=AdapterSpec(
adapter_type="sample",
pip_packages=[],
module="llama_stack.providers.remote.vector_io.sample",
config_class="llama_stack.providers.remote.vector_io.sample.SampleVectorIOConfig",
),
api_dependencies=[],
),
InlineProviderSpec(
api=Api.vector_io,
provider_type="inline::qdrant",
pip_packages=["qdrant-client"],
module="llama_stack.providers.inline.vector_io.qdrant",
config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
api_dependencies=[Api.inference],
),
remote_provider_spec( remote_provider_spec(
Api.vector_io, Api.vector_io,
AdapterSpec( AdapterSpec(

View file

@ -23,7 +23,6 @@ class QdrantVectorIOConfig(BaseModel):
prefix: Optional[str] = None prefix: Optional[str] = None
timeout: Optional[int] = None timeout: Optional[int] = None
host: Optional[str] = None host: Optional[str] = None
path: Optional[str] = None
@classmethod @classmethod
def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]: def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:

View file

@ -6,7 +6,7 @@
import logging import logging
import uuid import uuid
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional, Union
from numpy.typing import NDArray from numpy.typing import NDArray
from qdrant_client import AsyncQdrantClient, models from qdrant_client import AsyncQdrantClient, models
@ -16,12 +16,13 @@ from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import (
EmbeddingIndex, EmbeddingIndex,
VectorDBWithIndex, VectorDBWithIndex,
) )
from .config import QdrantVectorIOConfig from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
CHUNK_ID_KEY = "_chunk_id" CHUNK_ID_KEY = "_chunk_id"
@ -99,7 +100,9 @@ class QdrantIndex(EmbeddingIndex):
class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate): class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
def __init__(self, config: QdrantVectorIOConfig, inference_api: Api.inference) -> None: def __init__(
self, config: Union[RemoteQdrantVectorIOConfig, InlineQdrantVectorIOConfig], inference_api: Api.inference
) -> None:
self.config = config self.config = config
self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True)) self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True))
self.cache = {} self.cache = {}