Skip to content

Commit

Permalink
Merge pull request #737 from pneerincx/fix/lustre
Browse files Browse the repository at this point in the history
Fix/lustre
  • Loading branch information
marieke-bijlsma authored Feb 27, 2023
2 parents 272afc5 + 6e159c2 commit 22cd090
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 5 deletions.
24 changes: 24 additions & 0 deletions roles/lustre_client/handlers/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,28 @@
ansible.builtin.reboot:
become: true
listen: reboot

- name: 'Re-enable lnet.service part 1: disable lnet.service.'
ansible.builtin.systemd:
name: lnet.service
enabled: false
daemon_reload: true
become: true
listen: reenable_lnet

- name: 'Re-enable lnet.service part 2: enable lnet.service.'
ansible.builtin.systemd:
name: lnet.service
enabled: true
daemon_reload: true
become: true
listen: reenable_lnet

- name: Restart lnet.service.
ansible.builtin.systemd:
name: lnet.service
state: restarted
daemon_reload: true
become: true
listen: restart_lnet
...
61 changes: 56 additions & 5 deletions roles/lustre_client/tasks/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,29 +106,80 @@
become: true

- name: 'Create lustre lnet config: part 2.'
ansible.builtin.command:
cmd: |
lnetctl net del --net tcp
register: lnetctl_del_status
changed_when:
- lnetctl_del_status.rc == 0
failed_when:
- lnetctl_del_status.rc > 0
- '"errno: -2" not in lnetctl_del_status.stderr'
become: true

- name: 'Create lustre lnet config: part 3.'
ansible.builtin.command:
cmd: |
lnetctl net add --net "{{ item.name }}" --if "{{ item.interface }}"
register: lnetctl_status
register: lnetctl_add_status
changed_when:
- lnetctl_status.rc == 0
- lnetctl_add_status.rc == 0
failed_when:
- lnetctl_status.rc > 0
- '"errno: -17" not in lnetctl_status.stderr'
- lnetctl_add_status.rc > 0
- '"errno: -17" not in lnetctl_add_status.stderr'
with_items: "{{ lustre_client_networks }}"
become: true

- name: Save lustre lnet config to /etc/lnet.conf
ansible.builtin.shell:
cmd: |
checksum_old=$(md5sum /etc/lnet.conf)
lnetctl export /etc/lnet.conf
lnetctl export -b /etc/lnet.conf
checksum_new=$(md5sum /etc/lnet.conf)
if [[ "${checksum_old}" != "${checksum_new}" ]]; then
echo 'lnet.conf has changed.'
fi
register: lnetconf_status
changed_when:
- '"lnet.conf has changed." in lnetconf_status.stdout'
notify:
- restart_lnet
become: true

- name: Patch lnet.service file for systemd to ignore failure if a Lustre network ID was already loaded.
ansible.builtin.lineinfile:
path: /usr/lib/systemd/system/lnet.service
backup: true
insertafter: '\[Service\]'
regexp: '^#?SuccessExitStatus='
line: 'SuccessExitStatus=239' # lustre NID already loaded.
owner: root
group: root
mode: '0644'
notify:
- restart_lnet
become: true

- name: Patch lnet.service file for systemd to start lnet.service before remote-fs.target.
ansible.builtin.lineinfile:
path: /usr/lib/systemd/system/lnet.service
backup: true
insertafter: '\[Install\]'
regexp: '^#?WantedBy='
line: 'WantedBy=multi-user.target remote-fs.target'
owner: root
group: root
mode: '0644'
notify:
- reenable_lnet
- restart_lnet
become: true

- name: Start lnet.service.
ansible.builtin.systemd:
name: lnet.service
enabled: true
state: started
daemon_reload: true
become: true
...

0 comments on commit 22cd090

Please sign in to comment.