refactor: move chardet and pypdf dependencies to vector IO provider registries

Moves chardet and pypdf from main dependencies to provider-specific pip_packages
for all 12 vector IO providers that support file processing (Api.files).

Updated providers:
- inline::meta-reference, inline::faiss, inline::sqlite-vec, inline::sqlite_vec
- remote::chromadb, inline::chromadb, remote::pgvector, remote::weaviate
- inline::qdrant, remote::qdrant, remote::milvus, inline::milvus
This commit is contained in:
skamenan7 2025-09-29 12:27:57 -04:00
parent b96ffa4592
commit ddfd2ef018
3 changed files with 12 additions and 18 deletions

View file

@ -18,7 +18,7 @@ def available_providers() -> list[ProviderSpec]:
InlineProviderSpec(
api=Api.vector_io,
provider_type="inline::meta-reference",
pip_packages=["faiss-cpu"],
pip_packages=["faiss-cpu", "chardet", "pypdf"],
module="llama_stack.providers.inline.vector_io.faiss",
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
deprecation_warning="Please use the `inline::faiss` provider instead.",
@ -29,7 +29,7 @@ def available_providers() -> list[ProviderSpec]:
InlineProviderSpec(
api=Api.vector_io,
provider_type="inline::faiss",
pip_packages=["faiss-cpu"],
pip_packages=["faiss-cpu", "chardet", "pypdf"],
module="llama_stack.providers.inline.vector_io.faiss",
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
api_dependencies=[Api.inference],
@ -82,7 +82,7 @@ more details about Faiss in general.
InlineProviderSpec(
api=Api.vector_io,
provider_type="inline::sqlite-vec",
pip_packages=["sqlite-vec"],
pip_packages=["sqlite-vec", "chardet", "pypdf"],
module="llama_stack.providers.inline.vector_io.sqlite_vec",
config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
api_dependencies=[Api.inference],
@ -289,7 +289,7 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
InlineProviderSpec(
api=Api.vector_io,
provider_type="inline::sqlite_vec",
pip_packages=["sqlite-vec"],
pip_packages=["sqlite-vec", "chardet", "pypdf"],
module="llama_stack.providers.inline.vector_io.sqlite_vec",
config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",
@ -303,7 +303,7 @@ Please refer to the sqlite-vec provider documentation.
api=Api.vector_io,
adapter_type="chromadb",
provider_type="remote::chromadb",
pip_packages=["chromadb-client"],
pip_packages=["chromadb-client", "chardet", "pypdf"],
module="llama_stack.providers.remote.vector_io.chroma",
config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig",
api_dependencies=[Api.inference],
@ -345,7 +345,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
InlineProviderSpec(
api=Api.vector_io,
provider_type="inline::chromadb",
pip_packages=["chromadb"],
pip_packages=["chromadb", "chardet", "pypdf"],
module="llama_stack.providers.inline.vector_io.chroma",
config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig",
api_dependencies=[Api.inference],
@ -389,7 +389,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
api=Api.vector_io,
adapter_type="pgvector",
provider_type="remote::pgvector",
pip_packages=["psycopg2-binary"],
pip_packages=["psycopg2-binary", "chardet", "pypdf"],
module="llama_stack.providers.remote.vector_io.pgvector",
config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig",
api_dependencies=[Api.inference],
@ -500,7 +500,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
api=Api.vector_io,
adapter_type="weaviate",
provider_type="remote::weaviate",
pip_packages=["weaviate-client"],
pip_packages=["weaviate-client", "chardet", "pypdf"],
module="llama_stack.providers.remote.vector_io.weaviate",
config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig",
provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData",
@ -541,7 +541,7 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
InlineProviderSpec(
api=Api.vector_io,
provider_type="inline::qdrant",
pip_packages=["qdrant-client"],
pip_packages=["qdrant-client", "chardet", "pypdf"],
module="llama_stack.providers.inline.vector_io.qdrant",
config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
api_dependencies=[Api.inference],
@ -594,7 +594,7 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
api=Api.vector_io,
adapter_type="qdrant",
provider_type="remote::qdrant",
pip_packages=["qdrant-client"],
pip_packages=["qdrant-client", "chardet", "pypdf"],
module="llama_stack.providers.remote.vector_io.qdrant",
config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig",
api_dependencies=[Api.inference],
@ -607,7 +607,7 @@ Please refer to the inline provider documentation.
api=Api.vector_io,
adapter_type="milvus",
provider_type="remote::milvus",
pip_packages=["pymilvus>=2.4.10"],
pip_packages=["pymilvus>=2.4.10", "chardet", "pypdf"],
module="llama_stack.providers.remote.vector_io.milvus",
config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig",
api_dependencies=[Api.inference],
@ -813,7 +813,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
InlineProviderSpec(
api=Api.vector_io,
provider_type="inline::milvus",
pip_packages=["pymilvus[milvus-lite]>=2.4.10"],
pip_packages=["pymilvus[milvus-lite]>=2.4.10", "chardet", "pypdf"],
module="llama_stack.providers.inline.vector_io.milvus",
config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
api_dependencies=[Api.inference],

View file

@ -49,8 +49,6 @@ dependencies = [
"opentelemetry-exporter-otlp-proto-http>=1.30.0", # server
"aiosqlite>=0.21.0", # server - for metadata store
"asyncpg", # for metadata store
"chardet", # for character encoding detection in file processing
"pypdf", # for PDF file processing in vector stores
]
[project.optional-dependencies]

4
uv.lock generated
View file

@ -1768,7 +1768,6 @@ dependencies = [
{ name = "aiohttp" },
{ name = "aiosqlite" },
{ name = "asyncpg" },
{ name = "chardet" },
{ name = "fastapi" },
{ name = "fire" },
{ name = "h11" },
@ -1783,7 +1782,6 @@ dependencies = [
{ name = "pillow" },
{ name = "prompt-toolkit" },
{ name = "pydantic" },
{ name = "pypdf" },
{ name = "python-dotenv" },
{ name = "python-jose", extra = ["cryptography"] },
{ name = "python-multipart" },
@ -1893,7 +1891,6 @@ requires-dist = [
{ name = "aiohttp" },
{ name = "aiosqlite", specifier = ">=0.21.0" },
{ name = "asyncpg" },
{ name = "chardet" },
{ name = "fastapi", specifier = ">=0.115.0,<1.0" },
{ name = "fire" },
{ name = "h11", specifier = ">=0.16.0" },
@ -1910,7 +1907,6 @@ requires-dist = [
{ name = "pillow" },
{ name = "prompt-toolkit" },
{ name = "pydantic", specifier = ">=2.11.9" },
{ name = "pypdf" },
{ name = "python-dotenv" },
{ name = "python-jose", extras = ["cryptography"] },
{ name = "python-multipart", specifier = ">=0.0.20" },