services: llamastack: image: distribution-nvidia:dev network_mode: "host" volumes: - ~/.llama:/root/.llama - ./run.yaml:/root/llamastack-run-nvidia.yaml ports: - "8321:8321" environment: - INFERENCE_MODEL=${INFERENCE_MODEL:-Llama3.1-8B-Instruct} - NVIDIA_API_KEY=${NVIDIA_API_KEY:-} entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml-config /root/llamastack-run-nvidia.yaml" deploy: restart_policy: condition: on-failure delay: 3s max_attempts: 5 window: 60s