Add a meta-reference-quantized-gpu distribution

2024-10-23 19:33:14 -07:00 · 2024-10-23 19:33:14 -07:00 · 05a8d47b98
commit 05a8d47b98
parent f5dcc03742
6 changed files with 104 additions and 4 deletions
--- a/distributions/meta-reference-quantized-gpu/run.yaml
+++ b/distributions/meta-reference-quantized-gpu/run.yaml
@ -0,0 +1,51 @@
+version: '2'
+built_at: '2024-10-08T17:40:45.325529'
+image_name: local
+docker_image: null
+conda_env: local
+apis:
+- shields
+- agents
+- models
+- memory
+- memory_banks
+- inference
+- safety
+providers:
+  inference:
+  - provider_id: meta0
+    provider_type: meta-reference-quantized
+    config:
+      model: Llama3.2-3B-Instruct
+      quantization:
+        type: fp8
+      torch_seed: null
+      max_seq_len: 2048
+      max_batch_size: 1
+  safety:
+  - provider_id: meta0
+    provider_type: meta-reference
+    config:
+      llama_guard_shield:
+        model: Llama-Guard-3-1B
+        excluded_categories: []
+        disable_input_check: false
+        disable_output_check: false
+      prompt_guard_shield:
+        model: Prompt-Guard-86M
+  memory:
+  - provider_id: meta0
+    provider_type: meta-reference
+    config: {}
+  agents:
+  - provider_id: meta0
+    provider_type: meta-reference
+    config:
+      persistence_store:
+        namespace: null
+        type: sqlite
+        db_path: ~/.llama/runtime/kvstore.db
+  telemetry:
+  - provider_id: meta0
+    provider_type: meta-reference
+    config: {}