diff --git a/roles/lustre_client/handlers/main.yml b/roles/lustre_client/handlers/main.yml index 08ee7aa23..fdd4de594 100644 --- a/roles/lustre_client/handlers/main.yml +++ b/roles/lustre_client/handlers/main.yml @@ -3,4 +3,28 @@ ansible.builtin.reboot: become: true listen: reboot + +- name: 'Re-enable lnet.service part 1: disable lnet.service.' + ansible.builtin.systemd: + name: lnet.service + enabled: false + daemon_reload: true + become: true + listen: reenable_lnet + +- name: 'Re-enable lnet.service part 2: enable lnet.service.' + ansible.builtin.systemd: + name: lnet.service + enabled: true + daemon_reload: true + become: true + listen: reenable_lnet + +- name: Restart lnet.service. + ansible.builtin.systemd: + name: lnet.service + state: restarted + daemon_reload: true + become: true + listen: restart_lnet ... diff --git a/roles/lustre_client/tasks/install.yml b/roles/lustre_client/tasks/install.yml index 5fc8677be..8ec990146 100644 --- a/roles/lustre_client/tasks/install.yml +++ b/roles/lustre_client/tasks/install.yml @@ -106,15 +106,27 @@ become: true - name: 'Create lustre lnet config: part 2.' + ansible.builtin.command: + cmd: | + lnetctl net del --net tcp + register: lnetctl_del_status + changed_when: + - lnetctl_del_status.rc == 0 + failed_when: + - lnetctl_del_status.rc > 0 + - '"errno: -2" not in lnetctl_del_status.stderr' + become: true + +- name: 'Create lustre lnet config: part 3.' ansible.builtin.command: cmd: | lnetctl net add --net "{{ item.name }}" --if "{{ item.interface }}" - register: lnetctl_status + register: lnetctl_add_status changed_when: - - lnetctl_status.rc == 0 + - lnetctl_add_status.rc == 0 failed_when: - - lnetctl_status.rc > 0 - - '"errno: -17" not in lnetctl_status.stderr' + - lnetctl_add_status.rc > 0 + - '"errno: -17" not in lnetctl_add_status.stderr' with_items: "{{ lustre_client_networks }}" become: true @@ -122,7 +134,7 @@ ansible.builtin.shell: cmd: | checksum_old=$(md5sum /etc/lnet.conf) - lnetctl export /etc/lnet.conf + lnetctl export -b /etc/lnet.conf checksum_new=$(md5sum /etc/lnet.conf) if [[ "${checksum_old}" != "${checksum_new}" ]]; then echo 'lnet.conf has changed.' @@ -130,5 +142,44 @@ register: lnetconf_status changed_when: - '"lnet.conf has changed." in lnetconf_status.stdout' + notify: + - restart_lnet + become: true + +- name: Patch lnet.service file for systemd to ignore failure if a Lustre network ID was already loaded. + ansible.builtin.lineinfile: + path: /usr/lib/systemd/system/lnet.service + backup: true + insertafter: '\[Service\]' + regexp: '^#?SuccessExitStatus=' + line: 'SuccessExitStatus=239' # lustre NID already loaded. + owner: root + group: root + mode: '0644' + notify: + - restart_lnet + become: true + +- name: Patch lnet.service file for systemd to start lnet.service before remote-fs.target. + ansible.builtin.lineinfile: + path: /usr/lib/systemd/system/lnet.service + backup: true + insertafter: '\[Install\]' + regexp: '^#?WantedBy=' + line: 'WantedBy=multi-user.target remote-fs.target' + owner: root + group: root + mode: '0644' + notify: + - reenable_lnet + - restart_lnet + become: true + +- name: Start lnet.service. + ansible.builtin.systemd: + name: lnet.service + enabled: true + state: started + daemon_reload: true become: true ...