From d1f3b032c9ee9e88dabe7b7125b3e3ffb7aa36a7 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 16 Jan 2025 16:07:53 -0800 Subject: [PATCH] cerebras template update for memory (#792) # What does this PR do? - we no longer have meta-reference as memory provider, update cerebras template ## Test Plan ``` python llama_stack/scripts/distro_codegen.py ``` ## Sources Please link relevant resources if necessary. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests. --- distributions/dependencies.json | 1 + .../distributions/self_hosted_distro/cerebras.md | 2 +- llama_stack/templates/bedrock/build.yaml | 1 - llama_stack/templates/bedrock/run.yaml | 1 - llama_stack/templates/cerebras/build.yaml | 5 +++-- llama_stack/templates/cerebras/cerebras.py | 2 +- llama_stack/templates/cerebras/run.yaml | 11 ++++++++--- llama_stack/templates/fireworks/build.yaml | 1 - llama_stack/templates/fireworks/run-with-safety.yaml | 1 - llama_stack/templates/fireworks/run.yaml | 1 - llama_stack/templates/hf-endpoint/build.yaml | 1 - .../templates/hf-endpoint/run-with-safety.yaml | 1 - llama_stack/templates/hf-endpoint/run.yaml | 1 - llama_stack/templates/hf-serverless/build.yaml | 1 - .../templates/hf-serverless/run-with-safety.yaml | 1 - llama_stack/templates/hf-serverless/run.yaml | 1 - llama_stack/templates/meta-reference-gpu/build.yaml | 1 - .../templates/meta-reference-gpu/run-with-safety.yaml | 1 - llama_stack/templates/meta-reference-gpu/run.yaml | 1 - .../templates/meta-reference-quantized-gpu/build.yaml | 1 - .../templates/meta-reference-quantized-gpu/run.yaml | 1 - llama_stack/templates/nvidia/build.yaml | 1 - llama_stack/templates/nvidia/run.yaml | 1 - llama_stack/templates/ollama/build.yaml | 1 - llama_stack/templates/ollama/run-with-safety.yaml | 1 - llama_stack/templates/ollama/run.yaml | 1 - llama_stack/templates/remote-vllm/build.yaml | 1 - .../templates/remote-vllm/run-with-safety.yaml | 1 - llama_stack/templates/remote-vllm/run.yaml | 1 - llama_stack/templates/tgi/build.yaml | 1 - llama_stack/templates/tgi/run-with-safety.yaml | 1 - llama_stack/templates/tgi/run.yaml | 1 - llama_stack/templates/together/build.yaml | 1 - llama_stack/templates/together/run-with-safety.yaml | 1 - llama_stack/templates/together/run.yaml | 1 - llama_stack/templates/vllm-gpu/build.yaml | 1 - llama_stack/templates/vllm-gpu/run.yaml | 1 - 37 files changed, 14 insertions(+), 39 deletions(-) diff --git a/distributions/dependencies.json b/distributions/dependencies.json index ab3a367f1..d6d60ef7c 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -346,6 +346,7 @@ "blobfile", "cerebras_cloud_sdk", "chardet", + "chromadb-client", "datasets", "faiss-cpu", "fastapi", diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md index 302d121dd..22e4125bd 100644 --- a/docs/source/distributions/self_hosted_distro/cerebras.md +++ b/docs/source/distributions/self_hosted_distro/cerebras.md @@ -8,7 +8,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::cerebras` | -| memory | `inline::meta-reference` | +| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml index a68a8f6fc..794e54306 100644 --- a/llama_stack/templates/bedrock/build.yaml +++ b/llama_stack/templates/bedrock/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: bedrock distribution_spec: description: Use AWS Bedrock for running LLM inference and safety providers: diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 1d0721773..3a6922ae7 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: bedrock -conda_env: bedrock apis: - agents - datasetio diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml index 0fe568d09..9f187d3c7 100644 --- a/llama_stack/templates/cerebras/build.yaml +++ b/llama_stack/templates/cerebras/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: cerebras distribution_spec: description: Use Cerebras for running LLM inference providers: @@ -8,7 +7,9 @@ distribution_spec: safety: - inline::llama-guard memory: - - inline::meta-reference + - inline::faiss + - remote::chromadb + - remote::pgvector agents: - inline::meta-reference eval: diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index 6571170dd..17fc26632 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -27,7 +27,7 @@ def get_distribution_template() -> DistributionTemplate: providers = { "inference": ["remote::cerebras"], "safety": ["inline::llama-guard"], - "memory": ["inline::meta-reference"], + "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "agents": ["inline::meta-reference"], "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 42146ad4b..e0beab9cc 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: cerebras -conda_env: cerebras apis: - agents - datasetio @@ -26,13 +25,19 @@ providers: provider_type: inline::llama-guard config: {} memory: - - provider_id: meta-reference - provider_type: inline::meta-reference + - provider_id: faiss + provider_type: inline::faiss config: kvstore: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/faiss_store.db + - provider_id: chromadb + provider_type: remote::chromadb + config: {} + - provider_id: pgvector + provider_type: remote::pgvector + config: {} agents: - provider_id: meta-reference provider_type: inline::meta-reference diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml index e76cc86f1..504c913bd 100644 --- a/llama_stack/templates/fireworks/build.yaml +++ b/llama_stack/templates/fireworks/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: fireworks distribution_spec: description: Use Fireworks.AI for running LLM inference providers: diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index a279ab820..8fefbd98a 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: fireworks -conda_env: fireworks apis: - agents - datasetio diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 79fafe66c..53128f456 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: fireworks -conda_env: fireworks apis: - agents - datasetio diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml index c18689855..43486030e 100644 --- a/llama_stack/templates/hf-endpoint/build.yaml +++ b/llama_stack/templates/hf-endpoint/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: hf-endpoint distribution_spec: description: Use (an external) Hugging Face Inference Endpoint for running LLM inference providers: diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index a9d895d23..6a52ca861 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: hf-endpoint -conda_env: hf-endpoint apis: - agents - datasetio diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index e9b58c962..c019c587a 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: hf-endpoint -conda_env: hf-endpoint apis: - agents - datasetio diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml index a6b551e4a..e1328bd58 100644 --- a/llama_stack/templates/hf-serverless/build.yaml +++ b/llama_stack/templates/hf-serverless/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: hf-serverless distribution_spec: description: Use (an external) Hugging Face Inference Endpoint for running LLM inference providers: diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index 415cec648..0a64de358 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: hf-serverless -conda_env: hf-serverless apis: - agents - datasetio diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index ef9dedeed..f91e45fb6 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: hf-serverless -conda_env: hf-serverless apis: - agents - datasetio diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml index ba8413fa6..9ad7b26bf 100644 --- a/llama_stack/templates/meta-reference-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-gpu/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: meta-reference-gpu distribution_spec: description: Use Meta Reference for running LLM inference providers: diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index 4946fdab7..591afa2be 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: meta-reference-gpu -conda_env: meta-reference-gpu apis: - agents - datasetio diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 52345f3c1..cc22a514b 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: meta-reference-gpu -conda_env: meta-reference-gpu apis: - agents - datasetio diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml index 41ab44e38..e6b64ea1e 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: meta-reference-quantized-gpu distribution_spec: description: Use Meta Reference with fp8, int4 quantization for running LLM inference providers: diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml index 02a5bacaa..ff0affafb 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: meta-reference-quantized-gpu -conda_env: meta-reference-quantized-gpu apis: - agents - datasetio diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml index 813502ada..56124552b 100644 --- a/llama_stack/templates/nvidia/build.yaml +++ b/llama_stack/templates/nvidia/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: nvidia distribution_spec: description: Use NVIDIA NIM for running LLM inference providers: diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index d07eb25eb..1887a55d0 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: nvidia -conda_env: nvidia apis: - agents - datasetio diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index cbd9101cf..5f2e010ee 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: ollama distribution_spec: description: Use (an external) Ollama server for running LLM inference providers: diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 0792beddd..a808590c3 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: ollama -conda_env: ollama apis: - agents - datasetio diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 176465299..aa7b54a87 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: ollama -conda_env: ollama apis: - agents - datasetio diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml index 246e53db0..2659c8190 100644 --- a/llama_stack/templates/remote-vllm/build.yaml +++ b/llama_stack/templates/remote-vllm/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: remote-vllm distribution_spec: description: Use (an external) vLLM server for running LLM inference providers: diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 1babd04ac..4bf73bbda 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: remote-vllm -conda_env: remote-vllm apis: - agents - inference diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index a3a571423..1743793a8 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: remote-vllm -conda_env: remote-vllm apis: - agents - inference diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml index 399d4a616..3bcacffb0 100644 --- a/llama_stack/templates/tgi/build.yaml +++ b/llama_stack/templates/tgi/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: tgi distribution_spec: description: Use (an external) TGI server for running LLM inference providers: diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml index 4134101f6..070daedc1 100644 --- a/llama_stack/templates/tgi/run-with-safety.yaml +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: tgi -conda_env: tgi apis: - agents - datasetio diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index b0b78e33b..9cfba37aa 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: tgi -conda_env: tgi apis: - agents - datasetio diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml index 96f9f758e..ad970f405 100644 --- a/llama_stack/templates/together/build.yaml +++ b/llama_stack/templates/together/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: together distribution_spec: description: Use Together.AI for running LLM inference providers: diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index c415b0ec0..4e162aab3 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: together -conda_env: together apis: - agents - datasetio diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index ed65ded57..3c4844447 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: together -conda_env: together apis: - agents - datasetio diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml index 959f91d3e..e068fa97e 100644 --- a/llama_stack/templates/vllm-gpu/build.yaml +++ b/llama_stack/templates/vllm-gpu/build.yaml @@ -1,5 +1,4 @@ version: '2' -name: vllm-gpu distribution_spec: description: Use a built-in vLLM engine for running LLM inference providers: diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 48ec57cfb..1cb44b052 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: vllm-gpu -conda_env: vllm-gpu apis: - agents - datasetio