llama-stack-mirror/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
2024-12-12 14:46:51 -08:00

29 lines
685 B
YAML

version: '2'
name: meta-reference-quantized-gpu
distribution_spec:
description: Use Meta Reference with fp8, int4 quantization for running LLM inference
docker_image: null
providers:
inference:
- inline::meta-reference-quantized
- remote::sentence-transformers
memory:
- inline::faiss
- remote::chromadb
- remote::pgvector
safety:
- inline::llama-guard
agents:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
image_type: conda