version: '2' built_at: '2024-11-11T20:09:45.988375' image_name: remote-vllm docker_image: remote-vllm conda_env: null apis: - inference - memory - safety - agents - telemetry providers: inference: # serves main inference model - provider_id: vllm-0 provider_type: remote::vllm config: # NOTE: replace with "localhost" if you are running in "host" network mode url: ${env.LLAMA_INFERENCE_VLLM_URL:http://host.docker.internal:5100/v1} max_tokens: ${env.MAX_TOKENS:4096} api_token: fake # serves safety llama_guard model - provider_id: vllm-1 provider_type: remote::vllm config: # NOTE: replace with "localhost" if you are running in "host" network mode url: ${env.LLAMA_SAFETY_VLLM_URL:http://host.docker.internal:5101/v1} max_tokens: ${env.MAX_TOKENS:4096} api_token: fake memory: - provider_id: faiss-0 provider_type: inline::faiss config: kvstore: namespace: null type: sqlite db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/faiss_store.db" safety: - provider_id: llama-guard provider_type: inline::llama-guard config: {} memory: - provider_id: meta0 provider_type: inline::faiss config: {} agents: - provider_id: meta0 provider_type: inline::meta-reference config: persistence_store: namespace: null type: sqlite db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/agents_store.db" telemetry: - provider_id: meta0 provider_type: inline::meta-reference config: {} metadata_store: namespace: null type: sqlite db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/registry.db" models: - model_id: ${env.LLAMA_INFERENCE_MODEL:Llama3.1-8B-Instruct} provider_id: vllm-0 - model_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B} provider_id: vllm-1 shields: - shield_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}