-
-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathmain.yml
226 lines (187 loc) · 7.4 KB
/
main.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
---
# Automated setup of distributed Linpack benchmark.
#
# Inspired by: https://mikejmcfarlane.github.io/blog/2020/09/17/High-Performance-Linpack-for-raspberry-pi-supercomputer
# See also: https://www.sci-pi.org.uk/bench/linpack.html
- name: Install linpack benchmark.
hosts: cluster
become: false
tags: ['setup']
vars_files: ['config.yml']
tasks:
- ansible.builtin.include_tasks: dependencies/rhel-based.yml
when: ansible_os_family == 'RedHat'
- ansible.builtin.include_tasks: dependencies/debian-based.yml
when: ansible_os_family == 'Debian'
- ansible.builtin.include_tasks: dependencies/arch-based.yml
when: ansible_os_family == 'Archlinux'
- name: Create required temporary directories.
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: "{{ ansible_user | default(ansible_env.USER, true) | default(ansible_user_id, true) }}"
group: "{{ ansible_user | default(ansible_env.USER, true) | default(ansible_user_id, true) }}"
mode: 0755
loop:
- "{{ hpl_root }}/tmp"
- "{{ hpl_root }}/tmp/{{ linear_algebra_library }}-build"
become: true
- name: Download MPI (Message Passing Interface).
ansible.builtin.unarchive:
src: https://www.mpich.org/static/downloads/{{ mpich_version }}/mpich-{{ mpich_version }}.tar.gz
dest: "{{ hpl_root }}/tmp"
remote_src: true
creates: "{{ hpl_root }}/tmp/mpich-{{ mpich_version }}/README"
- name: Build MPI (takes a while).
ansible.builtin.command: "{{ item }}"
args:
chdir: "{{ hpl_root }}/tmp/mpich-{{ mpich_version }}"
creates: "{{ hpl_root }}/tmp/COMPILE_MPI_COMPLETE"
loop:
- ./configure --with-device=ch3:sock FFLAGS=-fallow-argument-mismatch
- "make -j{{ ansible_processor_nproc }}"
- name: Install MPI.
ansible.builtin.command: make install
args:
chdir: "{{ hpl_root }}/tmp/mpich-{{ mpich_version }}"
creates: "{{ hpl_root }}/tmp/COMPILE_MPI_COMPLETE"
become: true
- name: Create 'COMPILE_MPI_COMPLETE' file.
file:
path: "{{ hpl_root }}/tmp/COMPILE_MPI_COMPLETE"
state: touch
mode: 0644
- name: Test if we can set CPU scaling parameters.
ansible.builtin.command: >-
ls /sys/devices/system/cpu/cpu0/cpufreq
failed_when: false
changed_when: false
register: cpufreq_exists
# Note: There was no simpler way to do this besides `shell`.
- name: Ensure CPU scaling is set to 'performance'.
ansible.builtin.shell: >-
echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
when: cpufreq_exists.rc == 0
become: true
- ansible.builtin.include_tasks: tasks/algebra_atlas.yml
when: linear_algebra_library == 'atlas'
- ansible.builtin.include_tasks: tasks/algebra_blis.yml
when: linear_algebra_library == 'blis'
- ansible.builtin.include_tasks: tasks/algebra_openblas.yml
when: linear_algebra_library == 'openblas'
- name: Download HPL (High Performance Linpack).
ansible.builtin.unarchive:
src: http://www.netlib.org/benchmark/hpl/hpl-2.3.tar.gz
dest: "{{ hpl_root }}/tmp"
remote_src: true
creates: "{{ hpl_root }}/tmp/hpl-2.3/README"
- name: Set up HPL makefile.
ansible.builtin.shell: sh make_generic
args:
chdir: "{{ hpl_root }}/tmp/hpl-2.3/setup"
creates: "{{ hpl_root }}/tmp/COMPILE_HPL_COMPLETE"
- name: Copy HPL makefile into place.
ansible.builtin.template:
src: templates/benchmark-Make.top500.j2
dest: "{{ hpl_root }}/tmp/hpl-2.3/Make.top500"
mode: 0644
- name: Install HPL.
ansible.builtin.command: >-
make arch=top500
args:
chdir: "{{ hpl_root }}/tmp/hpl-2.3"
creates: "{{ hpl_root }}/tmp/COMPILE_HPL_COMPLETE"
- name: Create COMPILE_HPL_COMPLETE file.
ansible.builtin.file:
path: "{{ hpl_root }}/tmp/COMPILE_HPL_COMPLETE"
state: touch
mode: 0644
# See: https://github.com/geerlingguy/top500-benchmark/issues/1
- name: Configure SSH connections between nodes.
hosts: cluster
become: false
tags: ['ssh']
vars_files: ['config.yml']
vars:
host_ips: []
tasks:
- name: Ensure .ssh directory exists.
ansible.builtin.file:
path: "{{ ssh_user_home }}/.ssh"
state: directory
owner: "{{ ssh_user }}"
group: "{{ ssh_user }}"
mode: 0700
- name: Generate an OpenSSH keypair.
community.crypto.openssh_keypair:
path: "{{ ssh_user_home }}/.ssh/id_rsa"
size: 2048
- name: Read out ssh pubkey from each host.
ansible.builtin.command: cat "{{ ssh_user_home }}/.ssh/id_rsa.pub"
changed_when: false
register: ssh_pubkey
- name: Combine pubkeys into single list.
ansible.builtin.set_fact:
combined_ssh_pubkeys: "{{ ansible_play_hosts | map('extract', hostvars, 'ssh_pubkey') | map(attribute='stdout') | list }}"
run_once: true
- name: Write all pubkeys to each host.
ansible.posix.authorized_key:
user: "{{ ssh_user }}"
state: present
key: "{{ item }}"
loop: "{{ combined_ssh_pubkeys }}"
- name: Generate list of host IP addresses.
ansible.builtin.set_fact:
host_ips: "{{ host_ips + [ hostvars[item].ansible_default_ipv4.address ] }}"
loop: "{{ groups['cluster'] }}"
- name: Accept hostkeys for each host on each host.
ansible.builtin.command: >-
ssh {{ ssh_user }}@{{ item }} -o StrictHostKeyChecking=accept-new date
loop: "{{ host_ips }}"
- name: Write chunk of hosts information to the hosts file.
ansible.builtin.blockinfile:
path: /etc/hosts
marker: "# {mark} Ansible MPI host {{ item }}"
block: |
{{ hostvars[item].ansible_default_ipv4.address }} {{ item }} {{ item | replace('.local', '') }}
loop: "{{ groups['cluster'] }}"
become: true
- name: Run linpack benchmark.
hosts: cluster
become: false
tags: ['benchmark']
vars_files: ['config.yml']
vars:
host_ips: []
tasks:
- name: Create a file describing nodes for MPI execution.
ansible.builtin.template:
src: templates/mpi-node-config.j2
dest: "{{ hpl_root }}/tmp/hpl-2.3/bin/top500/cluster-hosts"
mode: 0644
# Template originally generated using this website:
# https://www.advancedclustering.com/act_kb/tune-hpl-dat-file/
- name: Create HPL.dat file.
ansible.builtin.template:
src: templates/HPL.dat.j2
dest: "{{ hpl_root }}/tmp/hpl-2.3/bin/top500/HPL.dat"
mode: 0644
- name: Generate list of host IP addresses.
ansible.builtin.set_fact:
host_ips: "{{ host_ips + [ hostvars[item].ansible_default_ipv4.address ] }}"
loop: "{{ groups['cluster'] }}"
# If this is not done, the nodes will fail to connect to each other
# causing the playbook to hang at 'Run the benchmark.'
- include_tasks: tasks/firewall-configure.yml
when: ansible_os_family == "RedHat"
- name: Run the benchmark.
ansible.builtin.command: mpirun -f cluster-hosts ./xhpl
args:
chdir: "{{ hpl_root }}/tmp/hpl-2.3/bin/top500"
register: mpirun_output
run_once: true
- include_tasks: tasks/firewall-reset.yml
when: ansible_os_family == "RedHat"
- name: Output the results.
debug: var=mpirun_output.stdout
run_once: true