mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 11:08:20 +00:00
qdrant inline provider
Signed-off-by: Daniele Martinoli <dmartino@redhat.com>
This commit is contained in:
parent
bfc79217a8
commit
6a7fe6312e
7 changed files with 67 additions and 6 deletions
|
@ -444,6 +444,7 @@
|
||||||
"psycopg2-binary",
|
"psycopg2-binary",
|
||||||
"pymongo",
|
"pymongo",
|
||||||
"pypdf",
|
"pypdf",
|
||||||
|
"qdrant-client",
|
||||||
"redis",
|
"redis",
|
||||||
"requests",
|
"requests",
|
||||||
"scikit-learn",
|
"scikit-learn",
|
||||||
|
|
|
@ -3,7 +3,7 @@ orphan: true
|
||||||
---
|
---
|
||||||
# Qdrant
|
# Qdrant
|
||||||
|
|
||||||
[Qdrant](https://qdrant.tech/documentation/) is a remote vector database provider for Llama Stack. It
|
[Qdrant](https://qdrant.tech/documentation/) is a inline and remote vector database provider for Llama Stack. It
|
||||||
allows you to store and query vectors directly in memory.
|
allows you to store and query vectors directly in memory.
|
||||||
That means you'll get fast and efficient vector retrieval.
|
That means you'll get fast and efficient vector retrieval.
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ That means you'll get fast and efficient vector retrieval.
|
||||||
To use Qdrant in your Llama Stack project, follow these steps:
|
To use Qdrant in your Llama Stack project, follow these steps:
|
||||||
|
|
||||||
1. Install the necessary dependencies.
|
1. Install the necessary dependencies.
|
||||||
2. Configure your Llama Stack project to use Faiss.
|
2. Configure your Llama Stack project to use Qdrant.
|
||||||
3. Start storing and querying vectors.
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
19
llama_stack/providers/inline/vector_io/qdrant/__init__.py
Normal file
19
llama_stack/providers/inline/vector_io/qdrant/__init__.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||||
|
|
||||||
|
from .config import QdrantVectorIOConfig
|
||||||
|
|
||||||
|
|
||||||
|
async def get_provider_impl(config: QdrantVectorIOConfig, deps: Dict[Api, ProviderSpec]):
|
||||||
|
from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
|
||||||
|
|
||||||
|
impl = QdrantVectorIOAdapter(config, deps[Api.inference])
|
||||||
|
await impl.initialize()
|
||||||
|
return impl
|
21
llama_stack/providers/inline/vector_io/qdrant/config.py
Normal file
21
llama_stack/providers/inline/vector_io/qdrant/config.py
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from llama_stack.schema_utils import json_schema_type
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class QdrantVectorIOConfig(BaseModel):
|
||||||
|
path: str
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_run_config(cls, __distro_dir__: str) -> dict[str, any]:
|
||||||
|
return {
|
||||||
|
"path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
|
||||||
|
}
|
|
@ -92,6 +92,24 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
),
|
),
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
),
|
),
|
||||||
|
remote_provider_spec(
|
||||||
|
api=Api.vector_io,
|
||||||
|
adapter=AdapterSpec(
|
||||||
|
adapter_type="sample",
|
||||||
|
pip_packages=[],
|
||||||
|
module="llama_stack.providers.remote.vector_io.sample",
|
||||||
|
config_class="llama_stack.providers.remote.vector_io.sample.SampleVectorIOConfig",
|
||||||
|
),
|
||||||
|
api_dependencies=[],
|
||||||
|
),
|
||||||
|
InlineProviderSpec(
|
||||||
|
api=Api.vector_io,
|
||||||
|
provider_type="inline::qdrant",
|
||||||
|
pip_packages=["qdrant-client"],
|
||||||
|
module="llama_stack.providers.inline.vector_io.qdrant",
|
||||||
|
config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
|
||||||
|
api_dependencies=[Api.inference],
|
||||||
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
Api.vector_io,
|
Api.vector_io,
|
||||||
AdapterSpec(
|
AdapterSpec(
|
||||||
|
|
|
@ -23,7 +23,6 @@ class QdrantVectorIOConfig(BaseModel):
|
||||||
prefix: Optional[str] = None
|
prefix: Optional[str] = None
|
||||||
timeout: Optional[int] = None
|
timeout: Optional[int] = None
|
||||||
host: Optional[str] = None
|
host: Optional[str] = None
|
||||||
path: Optional[str] = None
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:
|
def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
from numpy.typing import NDArray
|
from numpy.typing import NDArray
|
||||||
from qdrant_client import AsyncQdrantClient, models
|
from qdrant_client import AsyncQdrantClient, models
|
||||||
|
@ -16,12 +16,13 @@ from llama_stack.apis.inference import InterleavedContent
|
||||||
from llama_stack.apis.vector_dbs import VectorDB
|
from llama_stack.apis.vector_dbs import VectorDB
|
||||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||||
|
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
|
||||||
from llama_stack.providers.utils.memory.vector_store import (
|
from llama_stack.providers.utils.memory.vector_store import (
|
||||||
EmbeddingIndex,
|
EmbeddingIndex,
|
||||||
VectorDBWithIndex,
|
VectorDBWithIndex,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import QdrantVectorIOConfig
|
from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
CHUNK_ID_KEY = "_chunk_id"
|
CHUNK_ID_KEY = "_chunk_id"
|
||||||
|
@ -99,7 +100,9 @@ class QdrantIndex(EmbeddingIndex):
|
||||||
|
|
||||||
|
|
||||||
class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
||||||
def __init__(self, config: QdrantVectorIOConfig, inference_api: Api.inference) -> None:
|
def __init__(
|
||||||
|
self, config: Union[RemoteQdrantVectorIOConfig, InlineQdrantVectorIOConfig], inference_api: Api.inference
|
||||||
|
) -> None:
|
||||||
self.config = config
|
self.config = config
|
||||||
self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True))
|
self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True))
|
||||||
self.cache = {}
|
self.cache = {}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue