mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 07:14:20 +00:00
move distribution/templates to distributions/
This commit is contained in:
parent
b4aca0aeb6
commit
cbb423a32f
10 changed files with 143 additions and 1 deletions
10
distributions/bedrock/build.yaml
Normal file
10
distributions/bedrock/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: bedrock
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Amazon Bedrock APIs.
|
||||||
|
providers:
|
||||||
|
inference: remote::bedrock
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: conda
|
10
distributions/databricks/build.yaml
Normal file
10
distributions/databricks/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: databricks
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Databricks for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::databricks
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: conda
|
10
distributions/fireworks/build.yaml
Normal file
10
distributions/fireworks/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: fireworks
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Fireworks.ai for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::fireworks
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: conda
|
10
distributions/hf-endpoint/build.yaml
Normal file
10
distributions/hf-endpoint/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: hf-endpoint
|
||||||
|
distribution_spec:
|
||||||
|
description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
|
||||||
|
providers:
|
||||||
|
inference: remote::hf::endpoint
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: conda
|
10
distributions/hf-serverless/build.yaml
Normal file
10
distributions/hf-serverless/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: hf-serverless
|
||||||
|
distribution_spec:
|
||||||
|
description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
|
||||||
|
providers:
|
||||||
|
inference: remote::hf::serverless
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: conda
|
13
distributions/meta-reference-gpu/build.yaml
Normal file
13
distributions/meta-reference-gpu/build.yaml
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
name: meta-reference-gpu
|
||||||
|
distribution_spec:
|
||||||
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
|
providers:
|
||||||
|
inference: meta-reference
|
||||||
|
memory:
|
||||||
|
- meta-reference
|
||||||
|
- remote::chromadb
|
||||||
|
- remote::pgvector
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: docker
|
50
distributions/meta-reference-gpu/run.yaml
Normal file
50
distributions/meta-reference-gpu/run.yaml
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
version: '2'
|
||||||
|
built_at: '2024-10-08T17:40:45.325529'
|
||||||
|
image_name: local
|
||||||
|
docker_image: null
|
||||||
|
conda_env: local
|
||||||
|
apis:
|
||||||
|
- shields
|
||||||
|
- agents
|
||||||
|
- models
|
||||||
|
- memory
|
||||||
|
- memory_banks
|
||||||
|
- inference
|
||||||
|
- safety
|
||||||
|
providers:
|
||||||
|
inference:
|
||||||
|
- provider_id: meta0
|
||||||
|
provider_type: meta-reference
|
||||||
|
config:
|
||||||
|
model: Llama3.1-8B-Instruct
|
||||||
|
quantization: null
|
||||||
|
torch_seed: null
|
||||||
|
max_seq_len: 4096
|
||||||
|
max_batch_size: 1
|
||||||
|
safety:
|
||||||
|
- provider_id: meta0
|
||||||
|
provider_type: meta-reference
|
||||||
|
config:
|
||||||
|
llama_guard_shield:
|
||||||
|
model: Llama-Guard-3-1B
|
||||||
|
excluded_categories: []
|
||||||
|
disable_input_check: false
|
||||||
|
disable_output_check: false
|
||||||
|
prompt_guard_shield:
|
||||||
|
model: Prompt-Guard-86M
|
||||||
|
memory:
|
||||||
|
- provider_id: meta0
|
||||||
|
provider_type: meta-reference
|
||||||
|
config: {}
|
||||||
|
agents:
|
||||||
|
- provider_id: meta0
|
||||||
|
provider_type: meta-reference
|
||||||
|
config:
|
||||||
|
persistence_store:
|
||||||
|
namespace: null
|
||||||
|
type: sqlite
|
||||||
|
db_path: ~/.llama/runtime/kvstore.db
|
||||||
|
telemetry:
|
||||||
|
- provider_id: meta0
|
||||||
|
provider_type: meta-reference
|
||||||
|
config: {}
|
|
@ -40,7 +40,7 @@ docker compose down
|
||||||
### Start the Distribution (Single Node CPU)
|
### Start the Distribution (Single Node CPU)
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> This assumes you have an hosted endpoint
|
> This assumes you have an hosted endpoint compatible with TGI server.
|
||||||
|
|
||||||
```
|
```
|
||||||
$ cd llama-stack/distribution/tgi/cpu
|
$ cd llama-stack/distribution/tgi/cpu
|
||||||
|
@ -49,6 +49,15 @@ compose.yaml run.yaml
|
||||||
$ docker compose up
|
$ docker compose up
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Replace <ENTER_YOUR_TGI_HOSTED_ENDPOINT> in `run.yaml` file with your TGI endpoint.
|
||||||
|
```
|
||||||
|
inference:
|
||||||
|
- provider_id: tgi0
|
||||||
|
provider_type: remote::tgi
|
||||||
|
config:
|
||||||
|
url: <ENTER_YOUR_TGI_HOSTED_ENDPOINT>
|
||||||
|
```
|
||||||
|
|
||||||
### (Alternative) TGI server + llama stack run (Single Node GPU)
|
### (Alternative) TGI server + llama stack run (Single Node GPU)
|
||||||
|
|
||||||
If you wish to separately spin up a TGI server, and connect with Llama Stack, you may use the following commands.
|
If you wish to separately spin up a TGI server, and connect with Llama Stack, you may use the following commands.
|
||||||
|
|
10
distributions/together/build.yaml
Normal file
10
distributions/together/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: together
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Together.ai for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: remote::together
|
||||||
|
memory: meta-reference
|
||||||
|
safety: remote::together
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: conda
|
10
distributions/vllm/build.yaml
Normal file
10
distributions/vllm/build.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name: vllm
|
||||||
|
distribution_spec:
|
||||||
|
description: Like local, but use vLLM for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference: vllm
|
||||||
|
memory: meta-reference
|
||||||
|
safety: meta-reference
|
||||||
|
agents: meta-reference
|
||||||
|
telemetry: meta-reference
|
||||||
|
image_type: conda
|
Loading…
Add table
Add a link
Reference in a new issue