Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/lustre #737

Merged
merged 3 commits into from
Feb 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions roles/lustre_client/handlers/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,28 @@
ansible.builtin.reboot:
become: true
listen: reboot

- name: 'Re-enable lnet.service part 1: disable lnet.service.'
ansible.builtin.systemd:
name: lnet.service
enabled: false
daemon_reload: true
become: true
listen: reenable_lnet

- name: 'Re-enable lnet.service part 2: enable lnet.service.'
ansible.builtin.systemd:
name: lnet.service
enabled: true
daemon_reload: true
become: true
listen: reenable_lnet

- name: Restart lnet.service.
ansible.builtin.systemd:
name: lnet.service
state: restarted
daemon_reload: true
become: true
listen: restart_lnet
...
61 changes: 56 additions & 5 deletions roles/lustre_client/tasks/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,29 +106,80 @@
become: true

- name: 'Create lustre lnet config: part 2.'
ansible.builtin.command:
cmd: |
lnetctl net del --net tcp
register: lnetctl_del_status
changed_when:
- lnetctl_del_status.rc == 0
failed_when:
- lnetctl_del_status.rc > 0
- '"errno: -2" not in lnetctl_del_status.stderr'
become: true

- name: 'Create lustre lnet config: part 3.'
ansible.builtin.command:
cmd: |
lnetctl net add --net "{{ item.name }}" --if "{{ item.interface }}"
register: lnetctl_status
register: lnetctl_add_status
changed_when:
- lnetctl_status.rc == 0
- lnetctl_add_status.rc == 0
failed_when:
- lnetctl_status.rc > 0
- '"errno: -17" not in lnetctl_status.stderr'
- lnetctl_add_status.rc > 0
- '"errno: -17" not in lnetctl_add_status.stderr'
with_items: "{{ lustre_client_networks }}"
become: true

- name: Save lustre lnet config to /etc/lnet.conf
ansible.builtin.shell:
cmd: |
checksum_old=$(md5sum /etc/lnet.conf)
lnetctl export /etc/lnet.conf
lnetctl export -b /etc/lnet.conf
checksum_new=$(md5sum /etc/lnet.conf)
if [[ "${checksum_old}" != "${checksum_new}" ]]; then
echo 'lnet.conf has changed.'
fi
register: lnetconf_status
changed_when:
- '"lnet.conf has changed." in lnetconf_status.stdout'
notify:
- restart_lnet
become: true

- name: Patch lnet.service file for systemd to ignore failure if a Lustre network ID was already loaded.
ansible.builtin.lineinfile:
path: /usr/lib/systemd/system/lnet.service
backup: true
insertafter: '\[Service\]'
regexp: '^#?SuccessExitStatus='
line: 'SuccessExitStatus=239' # lustre NID already loaded.
owner: root
group: root
mode: '0644'
notify:
- restart_lnet
become: true

- name: Patch lnet.service file for systemd to start lnet.service before remote-fs.target.
ansible.builtin.lineinfile:
path: /usr/lib/systemd/system/lnet.service
backup: true
insertafter: '\[Install\]'
regexp: '^#?WantedBy='
line: 'WantedBy=multi-user.target remote-fs.target'
owner: root
group: root
mode: '0644'
notify:
- reenable_lnet
- restart_lnet
become: true

- name: Start lnet.service.
ansible.builtin.systemd:
name: lnet.service
enabled: true
state: started
daemon_reload: true
become: true
...