From 40d70864e710abd465be23fbc4a10252bde8b70e Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Fri, 13 Dec 2024 09:32:42 -0800 Subject: [PATCH] do not mention sentence transformer provider in docs --- distributions/dependencies.json | 40 +++++++++++++++++++ .../meta-reference-quantized-gpu.md | 2 +- .../meta-reference-quantized-gpu/build.yaml | 1 - .../meta_reference.py | 5 +-- 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/distributions/dependencies.json b/distributions/dependencies.json index 08595e479..7a974b917 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -260,6 +260,46 @@ "sentence-transformers --no-deps", "torch --index-url https://download.pytorch.org/whl/cpu" ], + "meta-reference-quantized-gpu": [ + "accelerate", + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "fairscale", + "faiss-cpu", + "fastapi", + "fbgemm-gpu", + "fire", + "httpx", + "lm-format-enforcer", + "matplotlib", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pypdf", + "redis", + "scikit-learn", + "scipy", + "sentence-transformers", + "sentencepiece", + "torch", + "torchao==0.5.0", + "torchvision", + "tqdm", + "transformers", + "uvicorn", + "zmq", + "sentence-transformers --no-deps", + "torch --index-url https://download.pytorch.org/whl/cpu" + ], "cerebras": [ "aiosqlite", "blobfile", diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index 89e62657c..837be744a 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -17,7 +17,7 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists | agents | `inline::meta-reference` | | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | -| inference | `inline::meta-reference-quantized`, `remote::sentence-transformers` | +| inference | `inline::meta-reference-quantized` | | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml index d81744900..9d866de18 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml @@ -6,7 +6,6 @@ distribution_spec: providers: inference: - inline::meta-reference-quantized - - remote::sentence-transformers memory: - inline::faiss - remote::chromadb diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py index f1a1f90cd..c460860c5 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py @@ -21,10 +21,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": [ - "inline::meta-reference-quantized", - "remote::sentence-transformers", - ], + "inference": ["inline::meta-reference-quantized"], "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"],