version: '2' image_name: vllm-gpu docker_image: null conda_env: vllm-gpu apis: - agents - inference - memory - safety - telemetry providers: inference: - provider_id: vllm provider_type: inline::vllm config: model: ${env.INFERENCE_MODEL:Llama3.2-3B-Instruct} tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:1} max_tokens: ${env.MAX_TOKENS:4096} enforce_eager: ${env.ENFORCE_EAGER:False} gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:0.7} memory: - provider_id: faiss provider_type: inline::faiss config: kvstore: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard config: {} agents: - provider_id: meta-reference provider_type: inline::meta-reference config: persistence_store: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/agents_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: {} metadata_store: namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: vllm provider_model_id: null shields: [] memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: []