Integrate distro docs into the restructured docs

This commit is contained in:
Ashwin Bharambe 2024-11-20 23:20:05 -08:00
parent 2411a44833
commit cd6ccb664c
17 changed files with 306 additions and 115 deletions

View file

@ -1,13 +1,19 @@
version: '2'
name: meta-reference-quantized-gpu
distribution_spec:
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
description: Use code from `llama_stack` itself to serve all llama stack APIs
description: Use Meta Reference with fp8, int4 quantization for running LLM inference
docker_image: null
providers:
inference: meta-reference-quantized
inference:
- inline::meta-reference-quantized
memory:
- inline::faiss
- remote::chromadb
- remote::pgvector
safety: inline::llama-guard
agents: inline::meta-reference
telemetry: inline::meta-reference
safety:
- inline::llama-guard
agents:
- inline::meta-reference
telemetry:
- inline::meta-reference
image_type: conda