qdrant inline provider

Signed-off-by: Daniele Martinoli <dmartino@redhat.com>
2025-12-31 01:30:00 +00:00 · 2025-02-26 10:00:37 +01:00 · 2025-02-26 10:00:37 +01:00 · 6a7fe6312e
commit 6a7fe6312e
parent bfc79217a8
7 changed files with 67 additions and 6 deletions
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@ -444,6 +444,7 @@
    "psycopg2-binary",
    "pymongo",
    "pypdf",
+    "qdrant-client",
    "redis",
    "requests",
    "scikit-learn",
--- a/docs/source/providers/vector_io/qdrant.md
+++ b/docs/source/providers/vector_io/qdrant.md
@ -3,7 +3,7 @@ orphan: true
 ---
 # Qdrant

-[Qdrant](https://qdrant.tech/documentation/) is a remote vector database provider for Llama Stack. It
+[Qdrant](https://qdrant.tech/documentation/) is a inline and remote vector database provider for Llama Stack. It 
 allows you to store and query vectors directly in memory.
 That means you'll get fast and efficient vector retrieval.

@ -17,7 +17,7 @@ That means you'll get fast and efficient vector retrieval.
 To use Qdrant in your Llama Stack project, follow these steps:

 1. Install the necessary dependencies.
-2. Configure your Llama Stack project to use Faiss.
+2. Configure your Llama Stack project to use Qdrant.
 3. Start storing and querying vectors.

 ## Installation
--- a/llama_stack/providers/inline/vector_io/qdrant/init.py
+++ b/llama_stack/providers/inline/vector_io/qdrant/init.py
@ -0,0 +1,19 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Dict
+
+from llama_stack.providers.datatypes import Api, ProviderSpec
+
+from .config import QdrantVectorIOConfig
+
+
+async def get_provider_impl(config: QdrantVectorIOConfig, deps: Dict[Api, ProviderSpec]):
+    from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
+
+    impl = QdrantVectorIOAdapter(config, deps[Api.inference])
+    await impl.initialize()
+    return impl
--- a/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/llama_stack/providers/inline/vector_io/qdrant/config.py
@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from pydantic import BaseModel
+
+from llama_stack.schema_utils import json_schema_type
+
+
+@json_schema_type
+class QdrantVectorIOConfig(BaseModel):
+    path: str
+
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str) -> dict[str, any]:
+        return {
+            "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
+        }
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@ -92,6 +92,24 @@ def available_providers() -> List[ProviderSpec]:
            ),
            api_dependencies=[Api.inference],
        ),
+        remote_provider_spec(
+            api=Api.vector_io,
+            adapter=AdapterSpec(
+                adapter_type="sample",
+                pip_packages=[],
+                module="llama_stack.providers.remote.vector_io.sample",
+                config_class="llama_stack.providers.remote.vector_io.sample.SampleVectorIOConfig",
+            ),
+            api_dependencies=[],
+        ),
+        InlineProviderSpec(
+            api=Api.vector_io,
+            provider_type="inline::qdrant",
+            pip_packages=["qdrant-client"],
+            module="llama_stack.providers.inline.vector_io.qdrant",
+            config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
+            api_dependencies=[Api.inference],
+        ),
        remote_provider_spec(
            Api.vector_io,
            AdapterSpec(
--- a/llama_stack/providers/remote/vector_io/qdrant/config.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/config.py
@ -23,7 +23,6 @@ class QdrantVectorIOConfig(BaseModel):
    prefix: Optional[str] = None
    timeout: Optional[int] = None
    host: Optional[str] = None
-    path: Optional[str] = None

    @classmethod
    def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@ -6,7 +6,7 @@

 import logging
 import uuid
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union

 from numpy.typing import NDArray
 from qdrant_client import AsyncQdrantClient, models
@ -16,12 +16,13 @@ from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
+from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
 from llama_stack.providers.utils.memory.vector_store import (
    EmbeddingIndex,
    VectorDBWithIndex,
 )

-from .config import QdrantVectorIOConfig
+from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig

 log = logging.getLogger(__name__)
 CHUNK_ID_KEY = "_chunk_id"
@ -99,7 +100,9 @@ class QdrantIndex(EmbeddingIndex):


 class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
-    def __init__(self, config: QdrantVectorIOConfig, inference_api: Api.inference) -> None:
+    def __init__(
+        self, config: Union[RemoteQdrantVectorIOConfig, InlineQdrantVectorIOConfig], inference_api: Api.inference
+    ) -> None:
        self.config = config
        self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True))
        self.cache = {}