forked from phoenix-oss/llama-stack-mirror
cerebras template update for memory (#792)
# What does this PR do? - we no longer have meta-reference as memory provider, update cerebras template ## Test Plan ``` python llama_stack/scripts/distro_codegen.py ``` ## Sources Please link relevant resources if necessary. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests.
This commit is contained in:
parent
48b12b9777
commit
d1f3b032c9
37 changed files with 14 additions and 39 deletions
|
@ -346,6 +346,7 @@
|
||||||
"blobfile",
|
"blobfile",
|
||||||
"cerebras_cloud_sdk",
|
"cerebras_cloud_sdk",
|
||||||
"chardet",
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
"datasets",
|
"datasets",
|
||||||
"faiss-cpu",
|
"faiss-cpu",
|
||||||
"fastapi",
|
"fastapi",
|
||||||
|
|
|
@ -8,7 +8,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
|
||||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::cerebras` |
|
| inference | `remote::cerebras` |
|
||||||
| memory | `inline::meta-reference` |
|
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: bedrock
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use AWS Bedrock for running LLM inference and safety
|
description: Use AWS Bedrock for running LLM inference and safety
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: bedrock
|
image_name: bedrock
|
||||||
conda_env: bedrock
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: cerebras
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use Cerebras for running LLM inference
|
description: Use Cerebras for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
@ -8,7 +7,9 @@ distribution_spec:
|
||||||
safety:
|
safety:
|
||||||
- inline::llama-guard
|
- inline::llama-guard
|
||||||
memory:
|
memory:
|
||||||
- inline::meta-reference
|
- inline::faiss
|
||||||
|
- remote::chromadb
|
||||||
|
- remote::pgvector
|
||||||
agents:
|
agents:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
eval:
|
eval:
|
||||||
|
|
|
@ -27,7 +27,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::cerebras"],
|
"inference": ["remote::cerebras"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"memory": ["inline::meta-reference"],
|
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"eval": ["inline::meta-reference"],
|
"eval": ["inline::meta-reference"],
|
||||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: cerebras
|
image_name: cerebras
|
||||||
conda_env: cerebras
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
@ -26,13 +25,19 @@ providers:
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
memory:
|
||||||
- provider_id: meta-reference
|
- provider_id: faiss
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/faiss_store.db
|
||||||
|
- provider_id: chromadb
|
||||||
|
provider_type: remote::chromadb
|
||||||
|
config: {}
|
||||||
|
- provider_id: pgvector
|
||||||
|
provider_type: remote::pgvector
|
||||||
|
config: {}
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: fireworks
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use Fireworks.AI for running LLM inference
|
description: Use Fireworks.AI for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: fireworks
|
image_name: fireworks
|
||||||
conda_env: fireworks
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: fireworks
|
image_name: fireworks
|
||||||
conda_env: fireworks
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: hf-endpoint
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
|
description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: hf-endpoint
|
image_name: hf-endpoint
|
||||||
conda_env: hf-endpoint
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: hf-endpoint
|
image_name: hf-endpoint
|
||||||
conda_env: hf-endpoint
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: hf-serverless
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
|
description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: hf-serverless
|
image_name: hf-serverless
|
||||||
conda_env: hf-serverless
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: hf-serverless
|
image_name: hf-serverless
|
||||||
conda_env: hf-serverless
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: meta-reference-gpu
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use Meta Reference for running LLM inference
|
description: Use Meta Reference for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: meta-reference-gpu
|
image_name: meta-reference-gpu
|
||||||
conda_env: meta-reference-gpu
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: meta-reference-gpu
|
image_name: meta-reference-gpu
|
||||||
conda_env: meta-reference-gpu
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: meta-reference-quantized-gpu
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use Meta Reference with fp8, int4 quantization for running LLM inference
|
description: Use Meta Reference with fp8, int4 quantization for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: meta-reference-quantized-gpu
|
image_name: meta-reference-quantized-gpu
|
||||||
conda_env: meta-reference-quantized-gpu
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: nvidia
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use NVIDIA NIM for running LLM inference
|
description: Use NVIDIA NIM for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: nvidia
|
image_name: nvidia
|
||||||
conda_env: nvidia
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: ollama
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use (an external) Ollama server for running LLM inference
|
description: Use (an external) Ollama server for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: ollama
|
image_name: ollama
|
||||||
conda_env: ollama
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: ollama
|
image_name: ollama
|
||||||
conda_env: ollama
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: remote-vllm
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use (an external) vLLM server for running LLM inference
|
description: Use (an external) vLLM server for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: remote-vllm
|
image_name: remote-vllm
|
||||||
conda_env: remote-vllm
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- inference
|
- inference
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: remote-vllm
|
image_name: remote-vllm
|
||||||
conda_env: remote-vllm
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- inference
|
- inference
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: tgi
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use (an external) TGI server for running LLM inference
|
description: Use (an external) TGI server for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: tgi
|
image_name: tgi
|
||||||
conda_env: tgi
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: tgi
|
image_name: tgi
|
||||||
conda_env: tgi
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: together
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use Together.AI for running LLM inference
|
description: Use Together.AI for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: together
|
image_name: together
|
||||||
conda_env: together
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: together
|
image_name: together
|
||||||
conda_env: together
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: vllm-gpu
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use a built-in vLLM engine for running LLM inference
|
description: Use a built-in vLLM engine for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: vllm-gpu
|
image_name: vllm-gpu
|
||||||
conda_env: vllm-gpu
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue