mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
cerebras template update for memory (#792)
# What does this PR do? - we no longer have meta-reference as memory provider, update cerebras template ## Test Plan ``` python llama_stack/scripts/distro_codegen.py ``` ## Sources Please link relevant resources if necessary. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests.
This commit is contained in:
parent
48b12b9777
commit
d1f3b032c9
37 changed files with 14 additions and 39 deletions
|
@ -346,6 +346,7 @@
|
|||
"blobfile",
|
||||
"cerebras_cloud_sdk",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
|
|
|
@ -8,7 +8,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
|
|||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::cerebras` |
|
||||
| memory | `inline::meta-reference` |
|
||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: bedrock
|
||||
distribution_spec:
|
||||
description: Use AWS Bedrock for running LLM inference and safety
|
||||
providers:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: bedrock
|
||||
conda_env: bedrock
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: cerebras
|
||||
distribution_spec:
|
||||
description: Use Cerebras for running LLM inference
|
||||
providers:
|
||||
|
@ -8,7 +7,9 @@ distribution_spec:
|
|||
safety:
|
||||
- inline::llama-guard
|
||||
memory:
|
||||
- inline::meta-reference
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
eval:
|
||||
|
|
|
@ -27,7 +27,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
providers = {
|
||||
"inference": ["remote::cerebras"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"memory": ["inline::meta-reference"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: cerebras
|
||||
conda_env: cerebras
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
@ -26,13 +25,19 @@ providers:
|
|||
provider_type: inline::llama-guard
|
||||
config: {}
|
||||
memory:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/faiss_store.db
|
||||
- provider_id: chromadb
|
||||
provider_type: remote::chromadb
|
||||
config: {}
|
||||
- provider_id: pgvector
|
||||
provider_type: remote::pgvector
|
||||
config: {}
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: fireworks
|
||||
distribution_spec:
|
||||
description: Use Fireworks.AI for running LLM inference
|
||||
providers:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: fireworks
|
||||
conda_env: fireworks
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: fireworks
|
||||
conda_env: fireworks
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: hf-endpoint
|
||||
distribution_spec:
|
||||
description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
|
||||
providers:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: hf-endpoint
|
||||
conda_env: hf-endpoint
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: hf-endpoint
|
||||
conda_env: hf-endpoint
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: hf-serverless
|
||||
distribution_spec:
|
||||
description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
|
||||
providers:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: hf-serverless
|
||||
conda_env: hf-serverless
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: hf-serverless
|
||||
conda_env: hf-serverless
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: meta-reference-gpu
|
||||
distribution_spec:
|
||||
description: Use Meta Reference for running LLM inference
|
||||
providers:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: meta-reference-gpu
|
||||
conda_env: meta-reference-gpu
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: meta-reference-gpu
|
||||
conda_env: meta-reference-gpu
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: meta-reference-quantized-gpu
|
||||
distribution_spec:
|
||||
description: Use Meta Reference with fp8, int4 quantization for running LLM inference
|
||||
providers:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: meta-reference-quantized-gpu
|
||||
conda_env: meta-reference-quantized-gpu
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: nvidia
|
||||
distribution_spec:
|
||||
description: Use NVIDIA NIM for running LLM inference
|
||||
providers:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: nvidia
|
||||
conda_env: nvidia
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: ollama
|
||||
distribution_spec:
|
||||
description: Use (an external) Ollama server for running LLM inference
|
||||
providers:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: ollama
|
||||
conda_env: ollama
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: ollama
|
||||
conda_env: ollama
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: remote-vllm
|
||||
distribution_spec:
|
||||
description: Use (an external) vLLM server for running LLM inference
|
||||
providers:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: remote-vllm
|
||||
conda_env: remote-vllm
|
||||
apis:
|
||||
- agents
|
||||
- inference
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: remote-vllm
|
||||
conda_env: remote-vllm
|
||||
apis:
|
||||
- agents
|
||||
- inference
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: tgi
|
||||
distribution_spec:
|
||||
description: Use (an external) TGI server for running LLM inference
|
||||
providers:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: tgi
|
||||
conda_env: tgi
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: tgi
|
||||
conda_env: tgi
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: together
|
||||
distribution_spec:
|
||||
description: Use Together.AI for running LLM inference
|
||||
providers:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: together
|
||||
conda_env: together
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: together
|
||||
conda_env: together
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
version: '2'
|
||||
name: vllm-gpu
|
||||
distribution_spec:
|
||||
description: Use a built-in vLLM engine for running LLM inference
|
||||
providers:
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: vllm-gpu
|
||||
conda_env: vllm-gpu
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue