version: '2' image_name: local docker_image: null conda_env: local apis: - shields - agents - models - memory - memory_banks - inference - safety providers: inference: - provider_id: vllm-inference provider_type: inline::vllm config: model: Llama3.2-3B-Instruct tensor_parallel_size: 1 gpu_memory_utilization: 0.4 enforce_eager: true max_tokens: 4096 - provider_id: vllm-inference-safety provider_type: inline::vllm config: model: Llama-Guard-3-1B tensor_parallel_size: 1 gpu_memory_utilization: 0.2 enforce_eager: true max_tokens: 4096 safety: - provider_id: meta0 provider_type: inline::llama-guard config: model: Llama-Guard-3-1B excluded_categories: [] # Uncomment to use prompt guard # - provider_id: meta1 # provider_type: inline::prompt-guard # config: # model: Prompt-Guard-86M memory: - provider_id: meta0 provider_type: inline::meta-reference config: {} # Uncomment to use pgvector # - provider_id: pgvector # provider_type: remote::pgvector # config: # host: 127.0.0.1 # port: 5432 # db: postgres # user: postgres # password: mysecretpassword agents: - provider_id: meta0 provider_type: inline::meta-reference config: persistence_store: namespace: null type: sqlite db_path: ~/.llama/runtime/agents_store.db telemetry: - provider_id: meta0 provider_type: inline::meta-reference config: {}