version: '2' name: meta-reference-quantized-gpu distribution_spec: description: Use Meta Reference with fp8, int4 quantization for running LLM inference docker_image: null providers: inference: - inline::meta-reference-quantized memory: - inline::faiss - remote::chromadb - remote::pgvector safety: - inline::llama-guard agents: - inline::meta-reference telemetry: - inline::meta-reference eval: - inline::meta-reference datasetio: - remote::huggingface - inline::localfs scoring: - inline::basic - inline::llm-as-judge - inline::braintrust image_type: conda