mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-23 12:57:11 +00:00
Add a meta-reference-quantized-gpu distribution
This commit is contained in:
parent
f5dcc03742
commit
05a8d47b98
6 changed files with 104 additions and 4 deletions
14
distributions/meta-reference-quantized-gpu/build.yaml
Normal file
14
distributions/meta-reference-quantized-gpu/build.yaml
Normal file
|
@ -0,0 +1,14 @@
|
|||
name: meta-reference-quantized-gpu
|
||||
distribution_spec:
|
||||
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||
providers:
|
||||
inference: meta-reference-quantized
|
||||
memory:
|
||||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: docker
|
Loading…
Add table
Add a link
Reference in a new issue