version: '2' image_name: meta-reference-quantized-gpu docker_image: null conda_env: meta-reference-quantized-gpu apis: - agents - inference - memory - safety - telemetry providers: inference: - provider_id: meta-reference-inference provider_type: inline::meta-reference-quantized config: model: ${env.INFERENCE_MODEL} max_seq_len: 4096 checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} quantization: type: fp8 memory: - provider_id: faiss provider_type: inline::faiss config: kvstore: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard config: {} agents: - provider_id: meta-reference provider_type: inline::meta-reference config: persistence_store: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/agents_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: {} metadata_store: namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: meta-reference-inference provider_model_id: null shields: [] memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: []