Integrate distro docs into the restructured docs

2025-12-08 03:00:56 +00:00 · 2024-11-20 23:20:05 -08:00 · 2024-11-20 23:20:05 -08:00 · cd6ccb664c
commit cd6ccb664c
parent 2411a44833
17 changed files with 306 additions and 115 deletions
--- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
@ -1,13 +1,19 @@
+version: '2'
 name: meta-reference-quantized-gpu
 distribution_spec:
-  docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
-  description: Use code from `llama_stack` itself to serve all llama stack APIs
+  description: Use Meta Reference with fp8, int4 quantization for running LLM inference
+  docker_image: null
  providers:
-    inference: meta-reference-quantized
+    inference:
+    - inline::meta-reference-quantized
    memory:
    - inline::faiss
    - remote::chromadb
    - remote::pgvector
-    safety: inline::llama-guard
-    agents: inline::meta-reference
-    telemetry: inline::meta-reference
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+image_type: conda