From cb2641435977ee9a28d9ce6e972ec51355a3c902 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Wed, 2 Aug 2023 13:18:29 +0200 Subject: [PATCH 01/35] fix wrong default for ccportal --- bicep/azhop.bicep | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bicep/azhop.bicep b/bicep/azhop.bicep index 01b199455..4f9a6e5d8 100644 --- a/bicep/azhop.bicep +++ b/bicep/azhop.bicep @@ -57,8 +57,8 @@ var linuxBasePlan = contains(azhopConfig, 'linux_base_plan') ? azhopConfig.linux var windowsBaseImage = contains(azhopConfig, 'windows_base_image') ? azhopConfig.windows_base_image : 'MicrosoftWindowsServer:WindowsServer:2019-Datacenter-smalldisk:latest' var lustreBaseImage = contains(azhopConfig, 'lustre_base_image') ? azhopConfig.lustre_base_image : 'azhpc:azurehpc-lustre:azurehpc-lustre-2_12:latest' var lustreBasePlan = contains(azhopConfig, 'lustre_base_plan') ? azhopConfig.lustre_base_plan : 'azhpc:azurehpc-lustre:azurehpc-lustre-2_12' -var cyclecloudBaseImage = contains(azhopConfig.cyclecloud, 'image') ? azhopConfig.cyclecloud.image : 'OpenLogic:CentOS:7_9-gen2:latest' -var cyclecloudBasePlan = contains(azhopConfig.cyclecloud, 'plan') ? azhopConfig.cyclecloud.plan : '' +var cyclecloudBaseImage = contains(azhopConfig.cyclecloud, 'image') ? azhopConfig.cyclecloud.image : linuxBaseImage +var cyclecloudBasePlan = contains(azhopConfig.cyclecloud, 'plan') ? azhopConfig.cyclecloud.plan : linuxBasePlan var createDatabase = (config.queue_manager == 'slurm' && config.slurm.accounting_enabled ) || config.enable_remote_winviz From 028deb1363c7cab99e79385c32c07228fb55ad47 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Wed, 2 Aug 2023 14:14:14 +0200 Subject: [PATCH 02/35] InfluxDB on AlmaLinux --- playbooks/roles/influxdb/tasks/main.yml | 17 +++++++++++++++-- playbooks/roles/influxdb/vars/AlmaLinux/8.yml | 6 ++++++ .../influxdb/vars/{main.yml => CentOS/7.yml} | 0 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 playbooks/roles/influxdb/vars/AlmaLinux/8.yml rename playbooks/roles/influxdb/vars/{main.yml => CentOS/7.yml} (100%) diff --git a/playbooks/roles/influxdb/tasks/main.yml b/playbooks/roles/influxdb/tasks/main.yml index 0ad8301a6..94bccc711 100644 --- a/playbooks/roles/influxdb/tasks/main.yml +++ b/playbooks/roles/influxdb/tasks/main.yml @@ -1,4 +1,16 @@ --- +- name: Include distribution variables + block: + - name: Include distribution variables + ansible.builtin.include_vars: "{{ item }}" + with_first_found: + - "{{ ansible_distribution }}/{{ ansible_distribution_major_version }}.yml" + - "{{ ansible_distribution }}.yml" + tags: [ 'always' ] + rescue: + - name: Change error message + ansible.builtin.fail: + msg: "unable to find anything in vars/ for this operating system! {{ ansible_distribution }}" - name: Add the Influxdb YUM repository yum_repository: @@ -6,16 +18,17 @@ description: "{{ influxdb_install_repo['name'] }}" baseurl: "{{ influxdb_install_repo['yum_repo'] }}" gpgkey: "{{ influxdb_install_repo['yum_gpg_key'] | default(omit) }}" - gpgcheck: "{{ influxdb_install_repo['yum_gpg_check'] | default(False) }}" + gpgcheck: "{{ influxdb_install_repo['yum_gpg_check'] | default(false) }}" state: present - name: Install Influxdb yum: name: - - https://dl.influxdata.com/influxdb/releases/influxdb-1.8.5.x86_64.rpm + - https://dl.influxdata.com/influxdb/releases/influxdb-1.8.10.x86_64.rpm - python3-pip state: present lock_timeout : 180 + disable_gpg_check: true - name: Install required pip packages pip: diff --git a/playbooks/roles/influxdb/vars/AlmaLinux/8.yml b/playbooks/roles/influxdb/vars/AlmaLinux/8.yml new file mode 100644 index 000000000..02af60c34 --- /dev/null +++ b/playbooks/roles/influxdb/vars/AlmaLinux/8.yml @@ -0,0 +1,6 @@ +influxdb_repo_stable: + name: "influxdb-stable" + yum_gpg_key: "https://repos.influxdata.com/influxdata-archive_compat.key" + yum_gpg_check: yes + yum_repo: "https://repos.influxdata.com/stable/$basearch/main" + diff --git a/playbooks/roles/influxdb/vars/main.yml b/playbooks/roles/influxdb/vars/CentOS/7.yml similarity index 100% rename from playbooks/roles/influxdb/vars/main.yml rename to playbooks/roles/influxdb/vars/CentOS/7.yml From c613621016019829bc1a15dbc41bfe7c17fa04e6 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Wed, 2 Aug 2023 14:49:24 +0200 Subject: [PATCH 03/35] Add AlmaLinux for CycleCloud. Install java --- .../roles/cyclecloud/tasks/AlmaLinux.yml | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 playbooks/roles/cyclecloud/tasks/AlmaLinux.yml diff --git a/playbooks/roles/cyclecloud/tasks/AlmaLinux.yml b/playbooks/roles/cyclecloud/tasks/AlmaLinux.yml new file mode 100644 index 000000000..32ce0acc8 --- /dev/null +++ b/playbooks/roles/cyclecloud/tasks/AlmaLinux.yml @@ -0,0 +1,60 @@ +--- +- name: Disable SELinux + selinux: + state: disabled + register: selinux + +- name: reboot + reboot: + when: selinux.reboot_required + +- name: Update packages marked for security + yum: + state: latest + security: yes + lock_timeout : 180 + +- name: install AZ CLI repo (CentOS) + shell: | + set -e + rpm --import https://packages.microsoft.com/keys/microsoft.asc + cat > /etc/yum.repos.d/azure-cli.repo < /etc/yum.repos.d/cyclecloud.repo < Date: Wed, 2 Aug 2023 14:53:21 +0200 Subject: [PATCH 04/35] remove duplicate shell entry --- playbooks/create_local_user.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/playbooks/create_local_user.yml b/playbooks/create_local_user.yml index bd7b1ac7c..94ce00240 100644 --- a/playbooks/create_local_user.yml +++ b/playbooks/create_local_user.yml @@ -16,7 +16,6 @@ comment: 'Local user' name: '{{user.name}}' groups: "{{ usergroups | selectattr('gid', 'in', (user.groups | default([], true))) | map(attribute='name')}}" - shell: /bin/bash uid: '{{user.uid}}' group: "{{ (usergroups | selectattr('gid', 'eq', 5000) | map(attribute='name'))[0] | default('azhop-users', true) }}" home: "{{ homedir_mountpoint }}/{{ user.name }}" From 10655d391231cc28ecf6c97350f9d3079a1f73ec Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Wed, 2 Aug 2023 17:05:16 +0200 Subject: [PATCH 05/35] Add support for OpenPBS 22 on AlmaLinux --- playbooks/roles/pbsserver/tasks/main.yml | 21 +++++++++++++++++-- .../roles/pbsserver/vars/AlmaLinux/8.yml | 3 +++ playbooks/roles/pbsserver/vars/CentOS/7.yml | 3 +++ playbooks/roles/pbsserver/vars/main.yml | 1 - 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 playbooks/roles/pbsserver/vars/AlmaLinux/8.yml create mode 100644 playbooks/roles/pbsserver/vars/CentOS/7.yml diff --git a/playbooks/roles/pbsserver/tasks/main.yml b/playbooks/roles/pbsserver/tasks/main.yml index 57ff8facd..c706e24a9 100644 --- a/playbooks/roles/pbsserver/tasks/main.yml +++ b/playbooks/roles/pbsserver/tasks/main.yml @@ -1,4 +1,16 @@ --- +- name: Include distribution variables + block: + - name: Include distribution variables + ansible.builtin.include_vars: "{{ item }}" + with_first_found: + - "{{ ansible_distribution }}/{{ ansible_distribution_major_version }}.yml" + - "{{ ansible_distribution }}.yml" + tags: [ 'always' ] + rescue: + - name: Change error message + ansible.builtin.fail: + msg: "unable to find anything in vars/ for this operating system! {{ ansible_distribution }}" - name: Install dependencies yum: @@ -7,15 +19,17 @@ - name: Download pbspro unarchive: - src: https://github.com/PBSPro/pbspro/releases/download/v{{openpbs_version}}/pbspro_{{openpbs_version}}.centos7.zip + src: "{{openpbs_package}}" dest: /mnt/resource remote_src: yes + when: openpbs_package != "null" - name: Install pbspro yum: name: - - /mnt/resource/pbspro_{{openpbs_version}}.centos7/pbspro-server-{{openpbs_version}}-0.x86_64.rpm + - "{{openpbs_server_rpm}}" state: present + disable_gpg_check: true lock_timeout : 180 - name: Ensure pbs-server is running. @@ -81,6 +95,7 @@ chmod +x /opt/cycle/pbspro/doqmgr.sh /opt/cycle/pbspro/doqmgr.sh +# TODO : Check if this is required with new version (20+) - name: Register submission hook shell: | /opt/pbs/bin/qmgr -c "create hook submit" @@ -99,6 +114,8 @@ /opt/pbs/bin/qmgr -c "set hook pbs_cgroups enabled = true" args: chdir: /opt/cycle/pbspro + when: openpbs_version == "19.1.1" + - name: Restart pbs-server service: diff --git a/playbooks/roles/pbsserver/vars/AlmaLinux/8.yml b/playbooks/roles/pbsserver/vars/AlmaLinux/8.yml new file mode 100644 index 000000000..d7e0fd72c --- /dev/null +++ b/playbooks/roles/pbsserver/vars/AlmaLinux/8.yml @@ -0,0 +1,3 @@ +openpbs_version: 22.05.11 +openpbs_package: "null" +openpbs_server_rpm: https://github.com/Azure/cyclecloud-pbspro/releases/download/{{cyclecloud_pbspro}}/openpbs-server-22.05.11-0.x86_64.rpm diff --git a/playbooks/roles/pbsserver/vars/CentOS/7.yml b/playbooks/roles/pbsserver/vars/CentOS/7.yml new file mode 100644 index 000000000..ae120dc3c --- /dev/null +++ b/playbooks/roles/pbsserver/vars/CentOS/7.yml @@ -0,0 +1,3 @@ +openpbs_version: 19.1.1 +openpbs_package: https://github.com/openpbs/openpbs/releases/download/v{{openpbs_version}}/pbspro_{{openpbs_version}}.centos7.zip +openpbs_server_rpm: /mnt/resource/pbspro_{{openpbs_version}}.centos7/pbspro-server-{{openpbs_version}}-0.x86_64.rpm diff --git a/playbooks/roles/pbsserver/vars/main.yml b/playbooks/roles/pbsserver/vars/main.yml index bee4e0202..6818e6cc7 100644 --- a/playbooks/roles/pbsserver/vars/main.yml +++ b/playbooks/roles/pbsserver/vars/main.yml @@ -1,2 +1 @@ cyclecloud_pbspro: 2.0.19 -openpbs_version: 19.1.1 From 57ab1ba1a5aa758bc62a868d884a1cca52646302 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Wed, 2 Aug 2023 21:20:38 +0200 Subject: [PATCH 06/35] PAM Auth for OOD --- playbooks/ood.yml | 15 +++---------- .../roles/ood_pam_auth/tasks/AlmaLinux/8.yml | 21 +++++++++++++++++++ .../roles/ood_pam_auth/tasks/CentOS/7.yml | 19 +++++++++++++++++ playbooks/roles/ood_pam_auth/tasks/main.yml | 5 +++++ 4 files changed, 48 insertions(+), 12 deletions(-) create mode 100644 playbooks/roles/ood_pam_auth/tasks/AlmaLinux/8.yml create mode 100644 playbooks/roles/ood_pam_auth/tasks/CentOS/7.yml create mode 100644 playbooks/roles/ood_pam_auth/tasks/main.yml diff --git a/playbooks/ood.yml b/playbooks/ood.yml index 2549d7115..735890c45 100644 --- a/playbooks/ood.yml +++ b/playbooks/ood.yml @@ -33,18 +33,9 @@ state: latest lock_timeout : 180 - - name: Set up PAM authentication - shell: | - yum install -y mod_authnz_pam - mkdir -p /opt/rh/httpd24/root/usr/lib64/httpd/modules/ - cp /usr/lib64/httpd/modules/mod_authnz_pam.so /opt/rh/httpd24/root/usr/lib64/httpd/modules/ - mkdir -p /opt/rh/httpd24/root/etc/httpd/conf.modules.d - echo "LoadModule authnz_pam_module modules/mod_authnz_pam.so" > /opt/rh/httpd24/root/etc/httpd/conf.modules.d/55-authnz_pam.conf - cp /etc/pam.d/sshd /etc/pam.d/ood - chmod 640 /etc/shadow - chgrp apache /etc/shadow - args: - creates: /etc/pam.d/ood + - name: Set up PAM authentication for OOD + include_role: + name: ood_pam_auth - name: Retrieve OIDC secret block: diff --git a/playbooks/roles/ood_pam_auth/tasks/AlmaLinux/8.yml b/playbooks/roles/ood_pam_auth/tasks/AlmaLinux/8.yml new file mode 100644 index 000000000..9a8a38990 --- /dev/null +++ b/playbooks/roles/ood_pam_auth/tasks/AlmaLinux/8.yml @@ -0,0 +1,21 @@ +--- + +- name: Install mod_authnz_pam + yum: + name: mod_authnz_pam + state: latest + lock_timeout : 180 + +- name: Add LoadModule line to 55-authnz_pam.conf + ansible.builtin.lineinfile: + path: /etc/httpd/conf.modules.d/55-authnz_pam.conf + search_string: 'LoadModule authnz_pam_module' + line: LoadModule authnz_pam_module modules/mod_authnz_pam.so + +- name: Set up PAM authentication + shell: | + cp /etc/pam.d/sshd /etc/pam.d/ood + chmod 640 /etc/shadow + chgrp apache /etc/shadow + args: + creates: /etc/pam.d/ood diff --git a/playbooks/roles/ood_pam_auth/tasks/CentOS/7.yml b/playbooks/roles/ood_pam_auth/tasks/CentOS/7.yml new file mode 100644 index 000000000..fddd3379b --- /dev/null +++ b/playbooks/roles/ood_pam_auth/tasks/CentOS/7.yml @@ -0,0 +1,19 @@ +--- + +- name: Install mod_authnz_pam + yum: + name: mod_authnz_pam + state: latest + lock_timeout : 180 + +- name: Set up PAM authentication + shell: | + mkdir -p /opt/rh/httpd24/root/usr/lib64/httpd/modules/ + cp /usr/lib64/httpd/modules/mod_authnz_pam.so /opt/rh/httpd24/root/usr/lib64/httpd/modules/ + mkdir -p /opt/rh/httpd24/root/etc/httpd/conf.modules.d + echo "LoadModule authnz_pam_module modules/mod_authnz_pam.so" > /opt/rh/httpd24/root/etc/httpd/conf.modules.d/55-authnz_pam.conf + cp /etc/pam.d/sshd /etc/pam.d/ood + chmod 640 /etc/shadow + chgrp apache /etc/shadow + args: + creates: /etc/pam.d/ood diff --git a/playbooks/roles/ood_pam_auth/tasks/main.yml b/playbooks/roles/ood_pam_auth/tasks/main.yml new file mode 100644 index 000000000..6341bf865 --- /dev/null +++ b/playbooks/roles/ood_pam_auth/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# Configure PAM Auth for OOD + +- name: Configure PAM Auth for OOD for {{ ansible_distribution }}-{{ ansible_distribution_major_version }} + include_tasks: "{{ ansible_distribution }}/{{ ansible_distribution_major_version }}.yml" From 51cdf80998e0e58b124d5cf919d0736d63c9610a Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Wed, 2 Aug 2023 21:52:05 +0200 Subject: [PATCH 07/35] PBS Client for AlmaLinux --- playbooks/roles/pbsclient/tasks/main.yml | 24 +++++++++++++++---- .../roles/pbsclient/vars/AlmaLinux/8.yml | 4 ++++ playbooks/roles/pbsclient/vars/CentOS/7.yml | 4 ++++ playbooks/roles/pbsclient/vars/main.yml | 2 +- .../roles/pbsserver/vars/AlmaLinux/8.yml | 2 +- 5 files changed, 29 insertions(+), 7 deletions(-) create mode 100644 playbooks/roles/pbsclient/vars/AlmaLinux/8.yml create mode 100644 playbooks/roles/pbsclient/vars/CentOS/7.yml diff --git a/playbooks/roles/pbsclient/tasks/main.yml b/playbooks/roles/pbsclient/tasks/main.yml index 4f402d3b9..eab391cb2 100644 --- a/playbooks/roles/pbsclient/tasks/main.yml +++ b/playbooks/roles/pbsclient/tasks/main.yml @@ -1,4 +1,16 @@ --- +- name: Include distribution variables + block: + - name: Include distribution variables + ansible.builtin.include_vars: "{{ item }}" + with_first_found: + - "{{ ansible_distribution }}/{{ ansible_distribution_major_version }}.yml" + - "{{ ansible_distribution }}.yml" + tags: [ 'always' ] + rescue: + - name: Change error message + ansible.builtin.fail: + msg: "unable to find anything in vars/ for this operating system! {{ ansible_distribution }}" - name: install epel yum: @@ -14,29 +26,31 @@ state: present lock_timeout : 180 -- name: Download PBS RPMs +- name: Download pbspro unarchive: - src: https://github.com/PBSPro/pbspro/releases/download/v{{openpbs_version}}/pbspro_{{openpbs_version}}.centos7.zip + src: "{{openpbs_package}}" dest: /mnt/resource remote_src: yes + when: openpbs_package != "null" - name: install PBS Client yum: name: - - /mnt/resource/pbspro_{{openpbs_version}}.centos7/pbspro-execution-{{openpbs_version}}-0.x86_64.rpm + - "{{openpbs_execution_rpm}}" state: present + disable_gpg_check: true lock_timeout : 180 - name: Configure PBS server name in pbs.conf replace : path: /etc/pbs.conf - regexp: 'CHANGE_THIS_TO_PBS_PRO_SERVER_HOSTNAME' + regexp: '{{openpbs_server_regex}}' replace: '{{pbs_server}}' - name: Configure PBS server name in mom conf replace : path: /var/spool/pbs/mom_priv/config - regexp: 'CHANGE_THIS_TO_PBS_PRO_SERVER_HOSTNAME' + regexp: '{{openpbs_server_regex}}' replace: '{{pbs_server}}' - name: Fix limits (if) diff --git a/playbooks/roles/pbsclient/vars/AlmaLinux/8.yml b/playbooks/roles/pbsclient/vars/AlmaLinux/8.yml new file mode 100644 index 000000000..4f472238d --- /dev/null +++ b/playbooks/roles/pbsclient/vars/AlmaLinux/8.yml @@ -0,0 +1,4 @@ +openpbs_version: 22.05.11 +openpbs_package: "null" +openpbs_execution_rpm: https://github.com/Azure/cyclecloud-pbspro/releases/download/{{cyclecloud_pbspro}}/openpbs-execution-{{openpbs_version}}-0.x86_64.rpm +openpbs_server_regex: CHANGE_THIS_TO_PBS_SERVER_HOSTNAME diff --git a/playbooks/roles/pbsclient/vars/CentOS/7.yml b/playbooks/roles/pbsclient/vars/CentOS/7.yml new file mode 100644 index 000000000..05486ea26 --- /dev/null +++ b/playbooks/roles/pbsclient/vars/CentOS/7.yml @@ -0,0 +1,4 @@ +openpbs_version: 19.1.1 +openpbs_package: https://github.com/openpbs/openpbs/releases/download/v{{openpbs_version}}/pbspro_{{openpbs_version}}.centos7.zip +openpbs_execution_rpm: /mnt/resource/pbspro_{{openpbs_version}}.centos7/pbspro-execution-{{openpbs_version}}-0.x86_64.rpm +openpbs_server_regex: CHANGE_THIS_TO_PBS_PRO_SERVER_HOSTNAME diff --git a/playbooks/roles/pbsclient/vars/main.yml b/playbooks/roles/pbsclient/vars/main.yml index 4548e2092..6818e6cc7 100644 --- a/playbooks/roles/pbsclient/vars/main.yml +++ b/playbooks/roles/pbsclient/vars/main.yml @@ -1 +1 @@ -openpbs_version: 19.1.1 +cyclecloud_pbspro: 2.0.19 diff --git a/playbooks/roles/pbsserver/vars/AlmaLinux/8.yml b/playbooks/roles/pbsserver/vars/AlmaLinux/8.yml index d7e0fd72c..aff8ea83c 100644 --- a/playbooks/roles/pbsserver/vars/AlmaLinux/8.yml +++ b/playbooks/roles/pbsserver/vars/AlmaLinux/8.yml @@ -1,3 +1,3 @@ openpbs_version: 22.05.11 openpbs_package: "null" -openpbs_server_rpm: https://github.com/Azure/cyclecloud-pbspro/releases/download/{{cyclecloud_pbspro}}/openpbs-server-22.05.11-0.x86_64.rpm +openpbs_server_rpm: https://github.com/Azure/cyclecloud-pbspro/releases/download/{{cyclecloud_pbspro}}/openpbs-server-{{openpbs_version}}-0.x86_64.rpm From 988957a1e71e64a6e66262ad00476ecd8080230f Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 3 Aug 2023 13:44:08 +0200 Subject: [PATCH 08/35] configure LMod for AlmaLinux + cleanup --- playbooks/ood.yml | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/playbooks/ood.yml b/playbooks/ood.yml index 735890c45..e2eaa7b17 100644 --- a/playbooks/ood.yml +++ b/playbooks/ood.yml @@ -260,9 +260,16 @@ sed -i '$e cat cyclecloud_proxy' /opt/ood/ood-portal-generator/templates/ood-portal.conf.erb rm cyclecloud_proxy /opt/ood/ood-portal-generator/sbin/update_ood_portal - systemctl try-restart httpd24-httpd.service httpd24-htcacheclean.service fi + - name: Configure Lmod + block: + - name: enable powertools + command: dnf config-manager --enable powertools + when: + - ansible_distribution == 'AlmaLinux' + - ansible_distribution_major_version == '8' + - name: add modules for job submission yum: name: Lmod, hwloc, numactl, jq @@ -310,15 +317,6 @@ path: /etc/ood/config/apps/dashboard/initializers state: directory - # - name: create ood lustre menu option - # lineinfile: - # path: /etc/ood/config/apps/dashboard/initializers/ood.rb - # search_string: /lustre - # line : 'OodFilesApp.candidate_favorite_paths << FavoritePath.new("/lustre", title: "Lustre")' - # create: yes - # state: present - # when: ( lustre.create | default(false) ) - - name: Create "{{mounts[item].mountpoint}}" directory mountpoint file: path: '{{mounts[item].mountpoint}}' @@ -343,16 +341,6 @@ src: 'ood.rb.j2' dest: '/etc/ood/config/apps/dashboard/initializers/ood.rb' - # - name: create mount "{{mounts[item].mountpoint}}" menu option - # lineinfile: - # path: /etc/ood/config/apps/dashboard/initializers/ood.rb - # search_string: '{{mounts[item].mountpoint}}' - # line : 'OodFilesApp.candidate_favorite_paths << FavoritePath.new("{{mounts[item].mountpoint}}", title: "{{item}}" )' - # create: yes - # state: present - # loop: "{{mounts | list}}" - # when: item != 'home' - # Update file upload staging area to be in /mnt/resource - name: Create passenger temp dir file: @@ -403,6 +391,9 @@ sed -i "s/^LoadModule mpm_prefork_module/#LoadModule mpm_prefork_module/" /opt/rh/httpd24/root/etc/httpd/conf.modules.d/00-mpm.conf sed -i "s/^LoadModule mpm_worker_module/#LoadModule mpm_worker_module/" /opt/rh/httpd24/root/etc/httpd/conf.modules.d/00-mpm.conf sed -i "s/^#LoadModule mpm_event_module/LoadModule mpm_event_module/" /opt/rh/httpd24/root/etc/httpd/conf.modules.d/00-mpm.conf + when: + - ansible_distribution == 'CentOS' + - ansible_distribution_major_version == '7' # - name: Tune Apache mpm_event_module # copy: @@ -453,6 +444,18 @@ - name: restart ood shell: systemctl try-restart httpd24-httpd.service httpd24-htcacheclean.service + when: + - ansible_distribution == 'CentOS' + - ansible_distribution_major_version == '7' + + - name: Ensure apache is running. + service: + name: httpd + state: started + enabled: yes + when: + - ansible_distribution == 'AlmaLinux' + - ansible_distribution_major_version == '8' - name: Add cvmsf-eessi support block: @@ -461,6 +464,7 @@ name: https://ecsft.cern.ch/dist/cvmfs/cvmfs-release/cvmfs-release-latest.noarch.rpm state: present lock_timeout : 180 + disable_gpg_check: true - name: Install cvmfs package yum: @@ -473,6 +477,7 @@ name: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi-latest.noarch.rpm state: present lock_timeout : 180 + disable_gpg_check: true - name: Create cvmfs config copy: From 72e86bf673c787115f7c3d6a21c53da687ec3b5a Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 3 Aug 2023 13:57:25 +0200 Subject: [PATCH 09/35] Install flask for AlmaLinux --- playbooks/roles/ood-applications/tasks/main.yml | 15 ++++++++++++++- .../roles/ood-applications/vars/AlmaLinux/8.yml | 1 + .../roles/ood-applications/vars/CentOS/7.yml | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 playbooks/roles/ood-applications/vars/AlmaLinux/8.yml create mode 100644 playbooks/roles/ood-applications/vars/CentOS/7.yml diff --git a/playbooks/roles/ood-applications/tasks/main.yml b/playbooks/roles/ood-applications/tasks/main.yml index b6dd66553..e95a38b3d 100644 --- a/playbooks/roles/ood-applications/tasks/main.yml +++ b/playbooks/roles/ood-applications/tasks/main.yml @@ -1,7 +1,20 @@ --- +- name: Include distribution variables + block: + - name: Include distribution variables + ansible.builtin.include_vars: "{{ item }}" + with_first_found: + - "{{ ansible_distribution }}/{{ ansible_distribution_major_version }}.yml" + - "{{ ansible_distribution }}.yml" + tags: [ 'always' ] + rescue: + - name: Change error message + ansible.builtin.fail: + msg: "unable to find anything in vars/ for this operating system! {{ ansible_distribution }}" + - name: install application dependencies yum: - name: python-flask + name: "{{ood-apps-dependencies}}" state: present lock_timeout : 180 diff --git a/playbooks/roles/ood-applications/vars/AlmaLinux/8.yml b/playbooks/roles/ood-applications/vars/AlmaLinux/8.yml new file mode 100644 index 000000000..ef2913947 --- /dev/null +++ b/playbooks/roles/ood-applications/vars/AlmaLinux/8.yml @@ -0,0 +1 @@ +ood-apps-dependencies: python3-flask \ No newline at end of file diff --git a/playbooks/roles/ood-applications/vars/CentOS/7.yml b/playbooks/roles/ood-applications/vars/CentOS/7.yml new file mode 100644 index 000000000..a0fde8a2a --- /dev/null +++ b/playbooks/roles/ood-applications/vars/CentOS/7.yml @@ -0,0 +1 @@ +ood-apps-dependencies: python-flask \ No newline at end of file From 3da99fa0e20535fa479d865f35c7aaaf36ef861f Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 3 Aug 2023 14:00:04 +0200 Subject: [PATCH 10/35] fix wrong variable name --- playbooks/roles/ood-applications/tasks/main.yml | 2 +- playbooks/roles/ood-applications/vars/AlmaLinux/8.yml | 2 +- playbooks/roles/ood-applications/vars/CentOS/7.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/playbooks/roles/ood-applications/tasks/main.yml b/playbooks/roles/ood-applications/tasks/main.yml index e95a38b3d..108667822 100644 --- a/playbooks/roles/ood-applications/tasks/main.yml +++ b/playbooks/roles/ood-applications/tasks/main.yml @@ -14,7 +14,7 @@ - name: install application dependencies yum: - name: "{{ood-apps-dependencies}}" + name: "{{ood_apps_dependencies}}" state: present lock_timeout : 180 diff --git a/playbooks/roles/ood-applications/vars/AlmaLinux/8.yml b/playbooks/roles/ood-applications/vars/AlmaLinux/8.yml index ef2913947..55d14ac1b 100644 --- a/playbooks/roles/ood-applications/vars/AlmaLinux/8.yml +++ b/playbooks/roles/ood-applications/vars/AlmaLinux/8.yml @@ -1 +1 @@ -ood-apps-dependencies: python3-flask \ No newline at end of file +ood_apps_dependencies: python3-flask \ No newline at end of file diff --git a/playbooks/roles/ood-applications/vars/CentOS/7.yml b/playbooks/roles/ood-applications/vars/CentOS/7.yml index a0fde8a2a..c5f6fc9be 100644 --- a/playbooks/roles/ood-applications/vars/CentOS/7.yml +++ b/playbooks/roles/ood-applications/vars/CentOS/7.yml @@ -1 +1 @@ -ood-apps-dependencies: python-flask \ No newline at end of file +ood_apps_dependencies: python-flask \ No newline at end of file From da7aabb64712dd5a197814591fec2aa6f1a05e52 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 3 Aug 2023 15:07:27 +0200 Subject: [PATCH 11/35] Telegraf for AlmaLinux --- playbooks/roles/telegraf/tasks/AlmaLinux.yml | 17 +++++++ playbooks/roles/telegraf/tasks/CentOS.yml | 16 +++++++ playbooks/roles/telegraf/tasks/Ubuntu.yml | 15 ++++++ playbooks/roles/telegraf/tasks/main.yml | 46 ++++++------------- playbooks/roles/telegraf/vars/AlmaLinux/8.yml | 6 +++ .../telegraf/vars/{main.yml => CentOS/7.yml} | 0 playbooks/telegraf.yml | 26 +++++++++++ 7 files changed, 93 insertions(+), 33 deletions(-) create mode 100644 playbooks/roles/telegraf/tasks/AlmaLinux.yml create mode 100644 playbooks/roles/telegraf/tasks/CentOS.yml create mode 100644 playbooks/roles/telegraf/tasks/Ubuntu.yml create mode 100644 playbooks/roles/telegraf/vars/AlmaLinux/8.yml rename playbooks/roles/telegraf/vars/{main.yml => CentOS/7.yml} (100%) diff --git a/playbooks/roles/telegraf/tasks/AlmaLinux.yml b/playbooks/roles/telegraf/tasks/AlmaLinux.yml new file mode 100644 index 000000000..113137ee3 --- /dev/null +++ b/playbooks/roles/telegraf/tasks/AlmaLinux.yml @@ -0,0 +1,17 @@ +--- + +- name: Add the Influxdb YUM repository + yum_repository: + name: "{{ influxdb_install_repo['name'] }}" + description: "{{ influxdb_install_repo['name'] }}" + baseurl: "{{ influxdb_install_repo['yum_repo'] }}" + gpgkey: "{{ influxdb_install_repo['yum_gpg_key'] | default(omit) }}" + gpgcheck: "{{ influxdb_install_repo['yum_gpg_check'] | default(false) }}" + state: present + +- name: Install telegraf + yum: + name: telegraf-1.27.3-1 + state: present + lock_timeout : 180 + disable_gpg_check: yes diff --git a/playbooks/roles/telegraf/tasks/CentOS.yml b/playbooks/roles/telegraf/tasks/CentOS.yml new file mode 100644 index 000000000..69a59b089 --- /dev/null +++ b/playbooks/roles/telegraf/tasks/CentOS.yml @@ -0,0 +1,16 @@ +--- + +- name: Add the Influxdb YUM repository + yum_repository: + name: "{{ influxdb_install_repo['name'] }}" + description: "{{ influxdb_install_repo['name'] }}" + baseurl: "{{ influxdb_install_repo['yum_repo'] }}" + gpgkey: "{{ influxdb_install_repo['yum_gpg_key'] | default(omit) }}" + gpgcheck: "{{ influxdb_install_repo['yum_gpg_check'] | default(false) }}" + state: present + +- name: Install telegraf + yum: + name: telegraf-1.27.3-1 + state: present + lock_timeout : 180 diff --git a/playbooks/roles/telegraf/tasks/Ubuntu.yml b/playbooks/roles/telegraf/tasks/Ubuntu.yml new file mode 100644 index 000000000..7c169bd6a --- /dev/null +++ b/playbooks/roles/telegraf/tasks/Ubuntu.yml @@ -0,0 +1,15 @@ +--- +- name: Install InfluxDB on Ubuntu + block: + - name: add APT reposotory + shell: | + # influxdata-archive_compat.key GPG Fingerprint: 9D539D90D3328DC7D6C8D3B9D8FF8E1F7DF8B07E + wget -q https://repos.influxdata.com/influxdata-archive_compat.key + echo '393e8779c89ac8d958f81f942f9ad7fb82a25e133faddaf92e15b16e6ac9ce4c influxdata-archive_compat.key' | sha256sum -c && cat influxdata-archive_compat.key | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/influxdata-archive_compat.gpg > /dev/null + echo 'deb [signed-by=/etc/apt/trusted.gpg.d/influxdata-archive_compat.gpg] https://repos.influxdata.com/debian stable main' | sudo tee /etc/apt/sources.list.d/influxdata.list + + - name: Install telegraf + package: + name: telegraf + state: present + update_cache: yes \ No newline at end of file diff --git a/playbooks/roles/telegraf/tasks/main.yml b/playbooks/roles/telegraf/tasks/main.yml index 946673487..52fd1ba24 100644 --- a/playbooks/roles/telegraf/tasks/main.yml +++ b/playbooks/roles/telegraf/tasks/main.yml @@ -1,39 +1,19 @@ --- - -- name: Install InfluxDB on CentOS - block: - - name: Add the Influxdb YUM repository - yum_repository: - name: "{{ influxdb_install_repo['name'] }}" - description: "{{ influxdb_install_repo['name'] }}" - baseurl: "{{ influxdb_install_repo['yum_repo'] }}" - gpgkey: "{{ influxdb_install_repo['yum_gpg_key'] | default(omit) }}" - gpgcheck: "{{ influxdb_install_repo['yum_gpg_check'] | default(False) }}" - state: present - - - name: Install telegraf - yum: - #name: https://dl.influxdata.com/telegraf/releases/telegraf-1.18.2-1.x86_64.rpm - name: telegraf-1.18.2-1 - state: present - lock_timeout : 180 - when: ansible_distribution == "CentOS" - -- name: Install InfluxDB on Ubuntu +- name: Include distribution variables block: - - name: add APT reposotory - shell: | - # influxdata-archive_compat.key GPG Fingerprint: 9D539D90D3328DC7D6C8D3B9D8FF8E1F7DF8B07E - wget -q https://repos.influxdata.com/influxdata-archive_compat.key - echo '393e8779c89ac8d958f81f942f9ad7fb82a25e133faddaf92e15b16e6ac9ce4c influxdata-archive_compat.key' | sha256sum -c && cat influxdata-archive_compat.key | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/influxdata-archive_compat.gpg > /dev/null - echo 'deb [signed-by=/etc/apt/trusted.gpg.d/influxdata-archive_compat.gpg] https://repos.influxdata.com/debian stable main' | sudo tee /etc/apt/sources.list.d/influxdata.list + - name: Include distribution variables + ansible.builtin.include_vars: "{{ item }}" + with_first_found: + - "{{ ansible_distribution }}/{{ ansible_distribution_major_version }}.yml" + - "{{ ansible_distribution }}.yml" + tags: [ 'always' ] + rescue: + - name: Change error message + ansible.builtin.fail: + msg: "unable to find anything in vars/ for this operating system! {{ ansible_distribution }}" - - name: Install telegraf - package: - name: telegraf - state: present - update_cache: yes - when: ansible_distribution == "Ubuntu" +- name: Install Telegraf for {{ansible_distribution}} + include_tasks: "{{ansible_distribution}}.yml" - name: Configure telegraf template: diff --git a/playbooks/roles/telegraf/vars/AlmaLinux/8.yml b/playbooks/roles/telegraf/vars/AlmaLinux/8.yml new file mode 100644 index 000000000..02af60c34 --- /dev/null +++ b/playbooks/roles/telegraf/vars/AlmaLinux/8.yml @@ -0,0 +1,6 @@ +influxdb_repo_stable: + name: "influxdb-stable" + yum_gpg_key: "https://repos.influxdata.com/influxdata-archive_compat.key" + yum_gpg_check: yes + yum_repo: "https://repos.influxdata.com/stable/$basearch/main" + diff --git a/playbooks/roles/telegraf/vars/main.yml b/playbooks/roles/telegraf/vars/CentOS/7.yml similarity index 100% rename from playbooks/roles/telegraf/vars/main.yml rename to playbooks/roles/telegraf/vars/CentOS/7.yml diff --git a/playbooks/telegraf.yml b/playbooks/telegraf.yml index 811469c17..5abd88089 100644 --- a/playbooks/telegraf.yml +++ b/playbooks/telegraf.yml @@ -48,6 +48,20 @@ SetHandler server-status + when: + - ansible_distribution == 'CentOS' + - ansible_distribution_major_version == '7' + + - name: enable apache mod_status + copy: + dest: /etc/httpd/conf.d/mod_status.conf + content: | + + SetHandler server-status + + when: + - ansible_distribution == 'AlmaLinux' + - ansible_distribution_major_version == '8' - name: add apache metrics to telegraf config blockinfile: @@ -59,6 +73,18 @@ - name: restart ood shell: systemctl try-restart httpd24-httpd.service httpd24-htcacheclean.service + when: + - ansible_distribution == 'CentOS' + - ansible_distribution_major_version == '7' + + - name: Ensure apache is running. + service: + name: httpd + state: started + enabled: yes + when: + - ansible_distribution == 'AlmaLinux' + - ansible_distribution_major_version == '8' - name: PBS metrics hosts: scheduler From cd9a5438487cfd91956ce3cd3a49a1f8d00a07f3 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 3 Aug 2023 21:22:20 +0200 Subject: [PATCH 12/35] Support for OpenPBS 22 with AlmaLinux --- .../files/almalinux/installpbs.sh | 54 ++++-- .../{2-installpbs.sh => 2-installpbs.sh.j2} | 10 +- .../roles/cyclecloud_cluster/tasks/common.yml | 94 +++++++++++ .../roles/cyclecloud_cluster/tasks/main.yml | 154 ++---------------- .../cyclecloud_cluster/tasks/openpbs.yml | 40 +++++ .../roles/cyclecloud_cluster/tasks/slurm.yml | 12 ++ .../cyclecloud_cluster/vars/AlmaLinux/8.yml | 1 + .../cyclecloud_cluster/vars/CentOS/7.yml | 1 + .../cyclecloud_cluster/vars/Ubuntu/20.yml | 1 + .../roles/cyclecloud_cluster/vars/main.yml | 3 +- 10 files changed, 216 insertions(+), 154 deletions(-) rename playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/scripts/{2-installpbs.sh => 2-installpbs.sh.j2} (72%) create mode 100644 playbooks/roles/cyclecloud_cluster/tasks/common.yml create mode 100644 playbooks/roles/cyclecloud_cluster/tasks/openpbs.yml create mode 100644 playbooks/roles/cyclecloud_cluster/tasks/slurm.yml create mode 100644 playbooks/roles/cyclecloud_cluster/vars/AlmaLinux/8.yml create mode 100644 playbooks/roles/cyclecloud_cluster/vars/CentOS/7.yml create mode 100644 playbooks/roles/cyclecloud_cluster/vars/Ubuntu/20.yml diff --git a/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh b/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh index 41912dd88..433bc40c7 100644 --- a/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh +++ b/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh @@ -1,9 +1,33 @@ #!/bin/bash +cyclecloud_pbspro=$1 +openpbs_version=$2 -BUILD_FROM_SOURCE=yes -[ -d /opt/pbs ] && exit 0 +major_installed_pbs_version=$(cat /var/spool/pbs/pbs_version | cut -d '=' -f2 | cut -d '.' -f1) +major_wanted_pbs_version=$(echo $openpbs_version | cut -d '.' -f1) + +function install_or_build() { + case $major_wanted_pbs_version in + 19) + build + ;; + 20) + install + ;; + *) + echo "Unsupported PBS version: $openpbs_version" + exit 1 + ;; + esac +} + +function install() { + dnf install -y epel-release + dnf install -y https://github.com/Azure/cyclecloud-pbspro/releases/download/${cyclecloud_pbspro}/openpbs-execution-${openpbs_version}-0.x86_64.rpm jq +} + +function build() { + [ -d /opt/pbs ] && exit 0 -if [ "$BUILD_FROM_SOURCE" == "yes" ] ; then dnf install -y gcc make rpm-build libtool hwloc-devel \ libX11-devel libXt-devel libedit-devel libical-devel \ ncurses-devel perl postgresql-devel postgresql-contrib python2 python2-devel tcl-devel \ @@ -16,7 +40,7 @@ if [ "$BUILD_FROM_SOURCE" == "yes" ] ; then ./configure --enable-static --enable-embedded-mode make cd .. - + wget -q https://github.com/openpbs/openpbs/releases/download/v19.1.1/pbspro-19.1.1.tar.gz -O pbspro-19.1.1.tar.gz tar -xzf pbspro-19.1.1.tar.gz cd pbspro-19.1.1/ @@ -26,11 +50,19 @@ if [ "$BUILD_FROM_SOURCE" == "yes" ] ; then /opt/pbs/libexec/pbs_postinstall execution chmod 4755 /opt/pbs/sbin/pbs_iff /opt/pbs/sbin/pbs_rcp +} + + +# If PBS is not installed, then install it +if [ ! -f "/etc/pbs.conf" ]; then + install_or_build else - wget https://github.com/openpbs/openpbs/releases/download/v20.0.1/openpbs_20.0.1.centos_8.zip - unzip -o openpbs_20.0.1.centos_8.zip - dnf install epel-release -y - dnf install -y openpbs_20.0.1.centos_8/openpbs-execution-20.0.1-0.x86_64.rpm jq - rm -rf openpbs_20.0.1.centos_8.zip - rm -rf openpbs_20.0.1.centos_8 -fi + # If installed version is not the same as the version we want to install, then remove and install it + if [ "$major_installed_pbs_version" != "major_wanted_pbs_version" ]; then + echo "Removing old PBS version $major_installed_pbs_version" + systemctl stop pbs + rm -rf /opt/pbs + rm -rf /var/spool/pbs + install_or_build + fi +fi \ No newline at end of file diff --git a/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/scripts/2-installpbs.sh b/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/scripts/2-installpbs.sh.j2 similarity index 72% rename from playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/scripts/2-installpbs.sh rename to playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/scripts/2-installpbs.sh.j2 index 7c1c3d2b8..41353e5c3 100755 --- a/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/scripts/2-installpbs.sh +++ b/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/scripts/2-installpbs.sh.j2 @@ -4,18 +4,18 @@ source "$script_dir/../files/azhop-helpers.sh" read_os find $script_dir/../files -name "*.sh" -exec chmod +x {} \; - -# If PBS is not installed, then install it -if [ ! -f "/etc/pbs.conf" ]; then - $script_dir/../files/$os_release/installpbs.sh -fi +$script_dir/../files/$os_release/installpbs.sh {{cyclecloud_openbps_release}} {{openpbs_version}} echo "Configuring PBS" +# for version 19 sed -i 's/CHANGE_THIS_TO_PBS_PRO_SERVER_HOSTNAME/scheduler/' /etc/pbs.conf sed -i 's/CHANGE_THIS_TO_PBS_PRO_SERVER_HOSTNAME/scheduler/' /var/spool/pbs/mom_priv/config sed -i "s/^if /#if /g" /opt/pbs/lib/init.d/limits.pbs_mom sed -i "s/^fi/#fi /g" /opt/pbs/lib/init.d/limits.pbs_mom +# for version 20 and above +sed -i 's/CHANGE_THIS_TO_PBS_SERVER_HOSTNAME/scheduler/' /etc/pbs.conf + systemctl restart pbs || exit 1 echo "PBS Restarted" diff --git a/playbooks/roles/cyclecloud_cluster/tasks/common.yml b/playbooks/roles/cyclecloud_cluster/tasks/common.yml new file mode 100644 index 000000000..cd5c9d7f6 --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/tasks/common.yml @@ -0,0 +1,94 @@ +--- +- name: Drop project files if they exists + file: + path: '{{project_root}}' + state: absent + +- name: Create common project dir + file: + path: '{{project_root}}' + state: directory + +- name: Create common CycleCloud project + command: '/usr/local/bin/cyclecloud project init common' + args: + chdir: '{{project_root}}' + creates: '{{common_project_root}}/project.ini' + +- name: Copy common cluster-init files + copy: + src: '{{role_path}}/projects/common/cluster-init' + dest: '{{common_project_root}}/specs/default/' + +- name: Copy mountnfs file. + template: + src: '{{role_path}}/projects/common/cluster-init/scripts/1-mountnfs.sh.j2' + dest: '{{common_project_root}}/specs/default/cluster-init/scripts/1-mountnfs.sh' + mode: 0777 + +- name: Add lustre script + template: + src: '{{role_path}}/projects/common/cluster-init/scripts/2-mountlustre.sh.j2' + dest: '{{common_project_root}}/specs/default/cluster-init/scripts/2-mountlustre.sh' + mode: 0777 + when: ( lustre.create | default(false) ) + +- name: Add Linux joindomain script + template: + src: '{{role_path}}/projects/common/cluster-init/scripts/3-joindomain.sh.j2' + dest: '{{common_project_root}}/specs/default/cluster-init/scripts/3-joindomain.sh' + mode: 0777 + +- name: Add Windows joindomain script + template: + src: '{{role_path}}/projects/common/cluster-init/scripts/3-joindomain.bat.j2' + dest: '{{common_project_root}}/specs/default/cluster-init/scripts/3-joindomain.bat' + mode: 0777 + +- name: Add default script + template: + src: '{{role_path}}/projects/common/cluster-init/scripts/5-default.sh.j2' + dest: '{{common_project_root}}/specs/default/cluster-init/scripts/5-default.sh' + mode: 0777 + +- name: Remove telegraf script if telegraf is disabled + file: + path: '{{common_project_root}}/specs/default/cluster-init/scripts/4-install-telegraf.sh' + state: absent + when: ( monitoring.telegraf | default(true) ) == false + +- name: Add telegraf configuration file if telegraf is enabled + template: + src: '{{role_path}}/projects/common/cluster-init/files/telegraf.conf.j2' + dest: '{{common_project_root}}/specs/default/cluster-init/files/telegraf.conf' + mode: 0600 + when: ( monitoring.telegraf | default(true) ) + +- name: Add nhc configuration file + template: + src: '{{role_path}}/projects/common/cluster-init/files/nhc/nhc_common.conf.j2' + dest: '{{common_project_root}}/specs/default/cluster-init/files/nhc/nhc_common.conf' + +- name: Remove Jinja files + file: + path: '{{item}}' + state: absent + with_items: + - '{{common_project_root}}/specs/default/cluster-init/files/telegraf.conf.j2' + - '{{common_project_root}}/specs/default/cluster-init/files/nhc/nhc_common.conf.j2' + - '{{common_project_root}}/specs/default/cluster-init/scripts/1-mountnfs.sh.j2' + - '{{common_project_root}}/specs/default/cluster-init/scripts/2-mountlustre.sh.j2' + - '{{common_project_root}}/specs/default/cluster-init/scripts/3-joindomain.sh.j2' + - '{{common_project_root}}/specs/default/cluster-init/scripts/3-joindomain.bat.j2' + - '{{common_project_root}}/specs/default/cluster-init/scripts/5-default.sh.j2' + +- name: Remove CVMFS if not enabled + file: + path: '{{common_project_root}}/specs/default/cluster-init/scripts/7-cvmfs-eessi.sh' + state: absent + when: cvmfs_eessi_enabled == false + +- name: Upload common CycleCloud project + command: '/usr/local/bin/cyclecloud project upload' + args: + chdir: '{{common_project_root}}' diff --git a/playbooks/roles/cyclecloud_cluster/tasks/main.yml b/playbooks/roles/cyclecloud_cluster/tasks/main.yml index fae4466a7..22571db8a 100644 --- a/playbooks/roles/cyclecloud_cluster/tasks/main.yml +++ b/playbooks/roles/cyclecloud_cluster/tasks/main.yml @@ -1,142 +1,22 @@ --- - -- name: Drop project files if they exists - file: - path: '{{project_root}}' - state: absent - -- name: Create common project dir - file: - path: '{{project_root}}' - state: directory - -- name: Create common CycleCloud project - command: '/usr/local/bin/cyclecloud project init common' - args: - chdir: '{{project_root}}' - creates: '{{common_project_root}}/project.ini' - -- name: Copy common cluster-init files - copy: - src: '{{role_path}}/projects/common/cluster-init' - dest: '{{common_project_root}}/specs/default/' - -- name: Copy mountnfs file. - template: - src: '{{role_path}}/projects/common/cluster-init/scripts/1-mountnfs.sh.j2' - dest: '{{common_project_root}}/specs/default/cluster-init/scripts/1-mountnfs.sh' - mode: 0777 - -- name: Add lustre script - template: - src: '{{role_path}}/projects/common/cluster-init/scripts/2-mountlustre.sh.j2' - dest: '{{common_project_root}}/specs/default/cluster-init/scripts/2-mountlustre.sh' - mode: 0777 - when: ( lustre.create | default(false) ) - -- name: Add Linux joindomain script - template: - src: '{{role_path}}/projects/common/cluster-init/scripts/3-joindomain.sh.j2' - dest: '{{common_project_root}}/specs/default/cluster-init/scripts/3-joindomain.sh' - mode: 0777 - -- name: Add Windows joindomain script - template: - src: '{{role_path}}/projects/common/cluster-init/scripts/3-joindomain.bat.j2' - dest: '{{common_project_root}}/specs/default/cluster-init/scripts/3-joindomain.bat' - mode: 0777 - -- name: Add default script - template: - src: '{{role_path}}/projects/common/cluster-init/scripts/5-default.sh.j2' - dest: '{{common_project_root}}/specs/default/cluster-init/scripts/5-default.sh' - mode: 0777 - -- name: Remove telegraf script if telegraf is disabled - file: - path: '{{common_project_root}}/specs/default/cluster-init/scripts/4-install-telegraf.sh' - state: absent - when: ( monitoring.telegraf | default(true) ) == false - -- name: Add telegraf configuration file if telegraf is enabled - template: - src: '{{role_path}}/projects/common/cluster-init/files/telegraf.conf.j2' - dest: '{{common_project_root}}/specs/default/cluster-init/files/telegraf.conf' - mode: 0600 - when: ( monitoring.telegraf | default(true) ) - -- name: Add nhc configuration file - template: - src: '{{role_path}}/projects/common/cluster-init/files/nhc/nhc_common.conf.j2' - dest: '{{common_project_root}}/specs/default/cluster-init/files/nhc/nhc_common.conf' - -- name: Remove Jinja files - file: - path: '{{item}}' - state: absent - with_items: - - '{{common_project_root}}/specs/default/cluster-init/files/telegraf.conf.j2' - - '{{common_project_root}}/specs/default/cluster-init/files/nhc/nhc_common.conf.j2' - - '{{common_project_root}}/specs/default/cluster-init/scripts/1-mountnfs.sh.j2' - - '{{common_project_root}}/specs/default/cluster-init/scripts/2-mountlustre.sh.j2' - - '{{common_project_root}}/specs/default/cluster-init/scripts/3-joindomain.sh.j2' - - '{{common_project_root}}/specs/default/cluster-init/scripts/3-joindomain.bat.j2' - - '{{common_project_root}}/specs/default/cluster-init/scripts/5-default.sh.j2' - -- name: Remove CVMFS if not enabled - file: - path: '{{common_project_root}}/specs/default/cluster-init/scripts/7-cvmfs-eessi.sh' - state: absent - when: cvmfs_eessi_enabled == false - -- name: Upload common CycleCloud project - command: '/usr/local/bin/cyclecloud project upload' - args: - chdir: '{{common_project_root}}' - -- name: OpenPBS Cycle Cloud project +- name: Include distribution variables block: - - name: Create Open PBS CycleCloud project - command: '/usr/local/bin/cyclecloud project init openpbs' - args: - chdir: '{{project_root}}' - creates: '{{openpbs_project_root}}/project.ini' - - - name: Copy OpenPBS cluster-init files - copy: - src: '{{role_path}}/projects/openpbs/cluster-init' - dest: '{{openpbs_project_root}}/specs/default/' - - - name: Upload OpenPBS CycleCloud project - command: '/usr/local/bin/cyclecloud project upload' - args: - chdir: '{{openpbs_project_root}}' - - - name: Add azhop-OpenPBS template - template: - src: 'azhop-OpenPBS.txt.j2' - dest: '{{project_root}}/azhop-OpenPBS.txt' - - - name: Import OpenPBS Cluster - command: '/usr/local/bin/cyclecloud import_cluster pbs1 -f {{project_root}}/azhop-OpenPBS.txt -c azhop-OpenPBS --force' - - - name: Start OpenPBS Cluster - command: '/usr/local/bin/cyclecloud start_cluster pbs1' - when: cc_queue_manager == "openpbs" - -- name: SLURM template and optional Enroot project - block: - - name: Add azhop-Slurm template - template: - src: 'azhop-slurm.txt.j2' - dest: '{{project_root}}/azhop-slurm.txt' - - - name: Import Slurm Cluster - command: '/usr/local/bin/cyclecloud import_cluster slurm1 -f {{project_root}}/azhop-slurm.txt -c azhop-slurm --force' - - - name: Start Slurm Cluster - command: '/usr/local/bin/cyclecloud start_cluster slurm1' - when: cc_queue_manager == "slurm" + - name: Include distribution variables + ansible.builtin.include_vars: "{{ item }}" + with_first_found: + - "{{ ansible_distribution }}/{{ ansible_distribution_major_version }}.yml" + - "{{ ansible_distribution }}.yml" + tags: [ 'always' ] + rescue: + - name: Change error message + ansible.builtin.fail: + msg: "unable to find anything in vars/ for this operating system! {{ ansible_distribution }}" + +- name: Create common project + include_tasks: common.yml + +- name: Create {{cc_queue_manager}} project + include_tasks: '{{cc_queue_manager}}.yml' - name: Populate cluster-init for enroot/pyxis block: diff --git a/playbooks/roles/cyclecloud_cluster/tasks/openpbs.yml b/playbooks/roles/cyclecloud_cluster/tasks/openpbs.yml new file mode 100644 index 000000000..9baef0536 --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/tasks/openpbs.yml @@ -0,0 +1,40 @@ +--- +- name: Create Open PBS CycleCloud project + command: '/usr/local/bin/cyclecloud project init openpbs' + args: + chdir: '{{project_root}}' + creates: '{{openpbs_project_root}}/project.ini' + +- name: Copy OpenPBS cluster-init files + copy: + src: '{{role_path}}/projects/openpbs/cluster-init' + dest: '{{openpbs_project_root}}/specs/default/' + +- name: Copy installpbs file. + template: + src: '{{role_path}}/projects/openpbs/cluster-init/scripts/2-installpbs.sh.j2' + dest: '{{openpbs_project_root}}/specs/default/cluster-init/scripts/2-installpbs.sh' + mode: 0777 + +- name: Remove Jinja files + file: + path: '{{item}}' + state: absent + with_items: + - '{{openpbs_project_root}}/specs/default/cluster-init/scripts/2-installpbs.sh.j2' + +- name: Upload OpenPBS CycleCloud project + command: '/usr/local/bin/cyclecloud project upload' + args: + chdir: '{{openpbs_project_root}}' + +- name: Add azhop-OpenPBS template + template: + src: 'azhop-OpenPBS.txt.j2' + dest: '{{project_root}}/azhop-OpenPBS.txt' + +- name: Import OpenPBS Cluster + command: '/usr/local/bin/cyclecloud import_cluster pbs1 -f {{project_root}}/azhop-OpenPBS.txt -c azhop-OpenPBS --force' + +- name: Start OpenPBS Cluster + command: '/usr/local/bin/cyclecloud start_cluster pbs1' diff --git a/playbooks/roles/cyclecloud_cluster/tasks/slurm.yml b/playbooks/roles/cyclecloud_cluster/tasks/slurm.yml new file mode 100644 index 000000000..f9e2e0412 --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/tasks/slurm.yml @@ -0,0 +1,12 @@ +--- + +- name: Add azhop-Slurm template + template: + src: 'azhop-slurm.txt.j2' + dest: '{{project_root}}/azhop-slurm.txt' + +- name: Import Slurm Cluster + command: '/usr/local/bin/cyclecloud import_cluster slurm1 -f {{project_root}}/azhop-slurm.txt -c azhop-slurm --force' + +- name: Start Slurm Cluster + command: '/usr/local/bin/cyclecloud start_cluster slurm1' diff --git a/playbooks/roles/cyclecloud_cluster/vars/AlmaLinux/8.yml b/playbooks/roles/cyclecloud_cluster/vars/AlmaLinux/8.yml new file mode 100644 index 000000000..4b983f77c --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/vars/AlmaLinux/8.yml @@ -0,0 +1 @@ +openpbs_version: 22.05.11 \ No newline at end of file diff --git a/playbooks/roles/cyclecloud_cluster/vars/CentOS/7.yml b/playbooks/roles/cyclecloud_cluster/vars/CentOS/7.yml new file mode 100644 index 000000000..6c82fbdac --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/vars/CentOS/7.yml @@ -0,0 +1 @@ +openpbs_version: 19.1.1 \ No newline at end of file diff --git a/playbooks/roles/cyclecloud_cluster/vars/Ubuntu/20.yml b/playbooks/roles/cyclecloud_cluster/vars/Ubuntu/20.yml new file mode 100644 index 000000000..6c82fbdac --- /dev/null +++ b/playbooks/roles/cyclecloud_cluster/vars/Ubuntu/20.yml @@ -0,0 +1 @@ +openpbs_version: 19.1.1 \ No newline at end of file diff --git a/playbooks/roles/cyclecloud_cluster/vars/main.yml b/playbooks/roles/cyclecloud_cluster/vars/main.yml index 15e223ed5..7d3397142 100644 --- a/playbooks/roles/cyclecloud_cluster/vars/main.yml +++ b/playbooks/roles/cyclecloud_cluster/vars/main.yml @@ -12,4 +12,5 @@ slurm_gid: 11100 munge_uid: 11101 munge_gid: 11101 cvmfs_eessi_enabled: false -cc_enable_remote_winviz: false \ No newline at end of file +cc_enable_remote_winviz: false +cyclecloud_openbps_release: 2.0.19 From fcdaa01aa5d27c73b3059b0bc48b7bb2efe9af16 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Fri, 4 Aug 2023 23:54:25 +0200 Subject: [PATCH 13/35] support version 22 --- .../cluster-init/files/almalinux/installpbs.sh | 13 ++++++++----- playbooks/roles/pbsserver/files/sched_config | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh b/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh index 433bc40c7..fb124bc28 100644 --- a/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh +++ b/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh @@ -6,15 +6,16 @@ major_installed_pbs_version=$(cat /var/spool/pbs/pbs_version | cut -d '=' -f2 | major_wanted_pbs_version=$(echo $openpbs_version | cut -d '.' -f1) function install_or_build() { - case $major_wanted_pbs_version in + local version=$1 + case $version in 19) build ;; - 20) + 20|22) install ;; *) - echo "Unsupported PBS version: $openpbs_version" + echo "Unsupported PBS version: $version" exit 1 ;; esac @@ -55,14 +56,16 @@ function build() { # If PBS is not installed, then install it if [ ! -f "/etc/pbs.conf" ]; then - install_or_build + install_or_build $major_wanted_pbs_version else # If installed version is not the same as the version we want to install, then remove and install it if [ "$major_installed_pbs_version" != "major_wanted_pbs_version" ]; then echo "Removing old PBS version $major_installed_pbs_version" + set +e systemctl stop pbs rm -rf /opt/pbs rm -rf /var/spool/pbs - install_or_build + set -e + install_or_build $major_wanted_pbs_version fi fi \ No newline at end of file diff --git a/playbooks/roles/pbsserver/files/sched_config b/playbooks/roles/pbsserver/files/sched_config index fc835a6bc..df72ea584 100644 --- a/playbooks/roles/pbsserver/files/sched_config +++ b/playbooks/roles/pbsserver/files/sched_config @@ -14,7 +14,7 @@ nonprimetime_prefix: np_ node_sort_key: "sort_priority HIGH" ALL provision_policy: "aggressive_provision" sort_queues: true ALL -resources: "ncpus, mem, arch, host, vnode, aoe, slot_type, group_id, ungrouped, disk, ngpus, nodearray, instance_id, machinetype" +resources: "ncpus, mem, arch, host, vnode, aoe, slot_type, group_id, ungrouped, disk, ngpus, nodearray, vm_size" load_balancing: false ALL smp_cluster_dist: pack fair_share: false ALL From 85ece2f543de62aa41b457f571eb63d0855f0bef Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Mon, 7 Aug 2023 12:37:46 +0200 Subject: [PATCH 14/35] filter hostname, don't install submit hook --- playbooks/roles/pbsserver/tasks/main.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/playbooks/roles/pbsserver/tasks/main.yml b/playbooks/roles/pbsserver/tasks/main.yml index c706e24a9..ead222124 100644 --- a/playbooks/roles/pbsserver/tasks/main.yml +++ b/playbooks/roles/pbsserver/tasks/main.yml @@ -70,11 +70,13 @@ args: chdir: /tmp/cyclecloud-pbspro -- name: change node timeout, default to 15mn +- name: change node timeout, default to 15mn, and accept all hostnames with a - shell: | jq '.idle_timeout={{autoscale.idle_timeout | default(900) }}' autoscale.json > tmp_autoscale.json || exit 1 cp tmp_autoscale.json autoscale.json sed -i 's/pcpu_count/vcpu_count/g' autoscale.json + jq '.valid_hostnames="^.+-.+$"' autoscale.json > tmp_autoscale.json || exit 1 + cp tmp_autoscale.json autoscale.json jq '.' autoscale.json || exit 1 rm tmp_autoscale.json args: @@ -95,7 +97,13 @@ chmod +x /opt/cycle/pbspro/doqmgr.sh /opt/cycle/pbspro/doqmgr.sh -# TODO : Check if this is required with new version (20+) +# Hook to replace environment variables which contains quotes as they are not supported by PBSPro 19 in the JSON output +# To test it submit a job with the following command a shell session on the ondemand VM : +# qsub -l select=1:ncpus=1 -V -- /usr/bin/bash -c 'hostname' +# Then validate the output of the job with the following command : +# qstat -fx -F json | jq +# +# Bug is fixed in version 20+ - name: Register submission hook shell: | /opt/pbs/bin/qmgr -c "create hook submit" @@ -103,6 +111,7 @@ /opt/pbs/bin/qmgr -c "set hook submit event = queuejob" args: chdir: /opt/cycle/pbspro + when: openpbs_version == "19.1.1" # Configure cgroups for largeviz3d machines # Due to a bug in PBSPro when using hyperthreaded machines we had to use our own cgroups python script. From deb71f4026aff90d583079996654716e7cef981b Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Mon, 7 Aug 2023 15:51:07 +0200 Subject: [PATCH 15/35] fix wrong entry for valid_hostnames --- playbooks/roles/pbsserver/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playbooks/roles/pbsserver/tasks/main.yml b/playbooks/roles/pbsserver/tasks/main.yml index ead222124..326057cac 100644 --- a/playbooks/roles/pbsserver/tasks/main.yml +++ b/playbooks/roles/pbsserver/tasks/main.yml @@ -75,7 +75,7 @@ jq '.idle_timeout={{autoscale.idle_timeout | default(900) }}' autoscale.json > tmp_autoscale.json || exit 1 cp tmp_autoscale.json autoscale.json sed -i 's/pcpu_count/vcpu_count/g' autoscale.json - jq '.valid_hostnames="^.+-.+$"' autoscale.json > tmp_autoscale.json || exit 1 + jq '.valid_hostnames=["^.+-.+$"]' autoscale.json > tmp_autoscale.json || exit 1 cp tmp_autoscale.json autoscale.json jq '.' autoscale.json || exit 1 rm tmp_autoscale.json From 33d07957b9eeb70bbaaf3c6c72ccf33badf1bf6c Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Tue, 8 Aug 2023 15:01:01 +0200 Subject: [PATCH 16/35] Use Python3 for passenger apps --- playbooks/roles/ood-applications/tasks/main.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/playbooks/roles/ood-applications/tasks/main.yml b/playbooks/roles/ood-applications/tasks/main.yml index 108667822..c34f0adc4 100644 --- a/playbooks/roles/ood-applications/tasks/main.yml +++ b/playbooks/roles/ood-applications/tasks/main.yml @@ -18,6 +18,13 @@ state: present lock_timeout : 180 +- name: Use Python3 for passenger apps + lineinfile: + state: present + dest: /etc/ood/config/nginx_stage.yml + regexp: "^#passenger_python|^passenger_python" + line: "passenger_python: '/usr/bin/python3'" + - name: Create or remove application directories file: path: "{{ ood_sys_app_dir }}/{{ item.name }}" From c08eb5b57354e23ac79d7822c047d610478ce478 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Wed, 9 Aug 2023 14:39:26 +0200 Subject: [PATCH 17/35] Use python3 for passenger only for AlmaLinux 8 --- playbooks/roles/ood-applications/tasks/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/playbooks/roles/ood-applications/tasks/main.yml b/playbooks/roles/ood-applications/tasks/main.yml index c34f0adc4..e8a803c08 100644 --- a/playbooks/roles/ood-applications/tasks/main.yml +++ b/playbooks/roles/ood-applications/tasks/main.yml @@ -24,6 +24,9 @@ dest: /etc/ood/config/nginx_stage.yml regexp: "^#passenger_python|^passenger_python" line: "passenger_python: '/usr/bin/python3'" + when: + - ansible_distribution == 'AlmaLinux' + - ansible_distribution_major_version == '8' - name: Create or remove application directories file: From eaa83d56ec14875ea2f36611c4d0d9394c54af7a Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Wed, 9 Aug 2023 16:02:29 +0200 Subject: [PATCH 18/35] Fix pbs build on alma --- .../projects/openpbs/cluster-init/files/almalinux/installpbs.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh b/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh index fb124bc28..ca650afd4 100644 --- a/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh +++ b/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh @@ -36,8 +36,10 @@ function build() { autoconf automake gcc-c++ git jq cd /mnt + rm -rf hwloc git clone https://github.com/open-mpi/hwloc.git -b v1.11 cd hwloc + ./autogen.sh ./configure --enable-static --enable-embedded-mode make cd .. From d20c1b35e6d90211351225f89530716f71a676ec Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Wed, 30 Aug 2023 12:33:46 +0200 Subject: [PATCH 19/35] use version 3.0.5 to support AlmaLinux --- playbooks/roles/ood-ansible | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playbooks/roles/ood-ansible b/playbooks/roles/ood-ansible index 150f4d8e6..12995d36f 160000 --- a/playbooks/roles/ood-ansible +++ b/playbooks/roles/ood-ansible @@ -1 +1 @@ -Subproject commit 150f4d8e6db1f31807706cc905930feb7711728f +Subproject commit 12995d36f4fde2fb5839e50664521e4bd39bd0c0 From 699a42be418e067a456f1da0fc7f2b2240b15cf6 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Wed, 30 Aug 2023 12:44:55 +0200 Subject: [PATCH 20/35] add almalinux plan --- .github/workflows/configs/almalinux.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/configs/almalinux.yml b/.github/workflows/configs/almalinux.yml index bc4b5f9dc..03a9647cd 100644 --- a/.github/workflows/configs/almalinux.yml +++ b/.github/workflows/configs/almalinux.yml @@ -1,4 +1,6 @@ linux_base_image: "almalinux:almalinux-hpc:8_7-hpc-gen2:latest" # publisher:offer:sku:version or image_id +# linux image plan if required, format is publisher:product:name +linux_base_plan: almalinux:almalinux-hpc:8_7-hpc-gen2 queues: - name: execute From f31422c1a74bd3ac9477d1935f59eb9df4944741 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Wed, 30 Aug 2023 17:03:17 +0200 Subject: [PATCH 21/35] not using almalinux hpc image --- .github/workflows/configs/almalinux.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/configs/almalinux.yml b/.github/workflows/configs/almalinux.yml index 03a9647cd..e8d78e78c 100644 --- a/.github/workflows/configs/almalinux.yml +++ b/.github/workflows/configs/almalinux.yml @@ -1,6 +1,6 @@ -linux_base_image: "almalinux:almalinux-hpc:8_7-hpc-gen2:latest" # publisher:offer:sku:version or image_id +linux_base_image: almalinux:almalinux-x86_64:8_7-gen2:latest # almalinux:almalinux-hpc:8_7-hpc-gen2:latest # publisher:offer:sku:version or image_id # linux image plan if required, format is publisher:product:name -linux_base_plan: almalinux:almalinux-hpc:8_7-hpc-gen2 +linux_base_plan: almalinux:almalinux-x86_64:8_7-gen2 #almalinux:almalinux-hpc:8_7-hpc-gen2 queues: - name: execute From 80f895425da08699b3629a7efc10b96eb60ee105 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 31 Aug 2023 11:00:19 +0200 Subject: [PATCH 22/35] no plan for almalinux-x86_64 --- .github/workflows/configs/almalinux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/configs/almalinux.yml b/.github/workflows/configs/almalinux.yml index e8d78e78c..e1de9003d 100644 --- a/.github/workflows/configs/almalinux.yml +++ b/.github/workflows/configs/almalinux.yml @@ -1,6 +1,6 @@ linux_base_image: almalinux:almalinux-x86_64:8_7-gen2:latest # almalinux:almalinux-hpc:8_7-hpc-gen2:latest # publisher:offer:sku:version or image_id # linux image plan if required, format is publisher:product:name -linux_base_plan: almalinux:almalinux-x86_64:8_7-gen2 #almalinux:almalinux-hpc:8_7-hpc-gen2 +#linux_base_plan: almalinux:almalinux-x86_64:8_7-gen2 #almalinux:almalinux-hpc:8_7-hpc-gen2 queues: - name: execute From 1f647f020c19df38bf8d41d52ef13df46e4085fc Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 31 Aug 2023 11:18:16 +0200 Subject: [PATCH 23/35] fix version --- packer/scripts/almalinux/openpbs.sh | 2 +- .../openpbs/cluster-init/files/almalinux/installpbs.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packer/scripts/almalinux/openpbs.sh b/packer/scripts/almalinux/openpbs.sh index cdd84070f..d5fe65ad1 100644 --- a/packer/scripts/almalinux/openpbs.sh +++ b/packer/scripts/almalinux/openpbs.sh @@ -27,7 +27,7 @@ if [ "$BUILD_FROM_SOURCE" == "yes" ] ; then chmod 4755 /opt/pbs/sbin/pbs_iff /opt/pbs/sbin/pbs_rcp else wget https://github.com/openpbs/openpbs/releases/download/v20.0.1/openpbs_20.0.1.centos_8.zip - unzip -o pbspro_19.1.1.centos7.zip + unzip -o openpbs_20.0.1.centos_8.zip dnf install epel-release -y dnf install -y openpbs_20.0.1.centos_8/openpbs-execution-20.0.1-0.x86_64.rpm jq rm -rf openpbs_20.0.1.centos_8.zip diff --git a/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh b/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh index ca650afd4..9113bb035 100644 --- a/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh +++ b/playbooks/roles/cyclecloud_cluster/projects/openpbs/cluster-init/files/almalinux/installpbs.sh @@ -9,7 +9,7 @@ function install_or_build() { local version=$1 case $version in 19) - build + build19 ;; 20|22) install @@ -26,7 +26,7 @@ function install() { dnf install -y https://github.com/Azure/cyclecloud-pbspro/releases/download/${cyclecloud_pbspro}/openpbs-execution-${openpbs_version}-0.x86_64.rpm jq } -function build() { +function build19() { [ -d /opt/pbs ] && exit 0 dnf install -y gcc make rpm-build libtool hwloc-devel \ From 6e8974fc4384c22aeb58daac9ec6ccceda0966ba Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 31 Aug 2023 11:18:26 +0200 Subject: [PATCH 24/35] cleanup --- .github/workflows/configs/almalinux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/configs/almalinux.yml b/.github/workflows/configs/almalinux.yml index e1de9003d..fd3e09b89 100644 --- a/.github/workflows/configs/almalinux.yml +++ b/.github/workflows/configs/almalinux.yml @@ -1,6 +1,6 @@ linux_base_image: almalinux:almalinux-x86_64:8_7-gen2:latest # almalinux:almalinux-hpc:8_7-hpc-gen2:latest # publisher:offer:sku:version or image_id # linux image plan if required, format is publisher:product:name -#linux_base_plan: almalinux:almalinux-x86_64:8_7-gen2 #almalinux:almalinux-hpc:8_7-hpc-gen2 +#linux_base_plan: almalinux:almalinux-hpc:8_7-hpc-gen2 queues: - name: execute From 25dac2f014f987950b0ee2106f35d4a4cb909e66 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 31 Aug 2023 11:31:27 +0200 Subject: [PATCH 25/35] fix wrong indentation --- playbooks/roles/ood-applications/tasks/main.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/playbooks/roles/ood-applications/tasks/main.yml b/playbooks/roles/ood-applications/tasks/main.yml index e8a803c08..e9eb76e00 100644 --- a/playbooks/roles/ood-applications/tasks/main.yml +++ b/playbooks/roles/ood-applications/tasks/main.yml @@ -20,13 +20,13 @@ - name: Use Python3 for passenger apps lineinfile: - state: present - dest: /etc/ood/config/nginx_stage.yml - regexp: "^#passenger_python|^passenger_python" - line: "passenger_python: '/usr/bin/python3'" - when: - - ansible_distribution == 'AlmaLinux' - - ansible_distribution_major_version == '8' + state: present + dest: /etc/ood/config/nginx_stage.yml + regexp: "^#passenger_python|^passenger_python" + line: "passenger_python: '/usr/bin/python3'" + when: + - ansible_distribution == 'AlmaLinux' + - ansible_distribution_major_version == '8' - name: Create or remove application directories file: From 900b85ec7ea73ea62e58349880b79192182e5629 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 31 Aug 2023 11:31:39 +0200 Subject: [PATCH 26/35] add almalinux in doc --- config.tpl.yml | 2 ++ docs/deploy/index.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/config.tpl.yml b/config.tpl.yml index 09610feeb..74290c327 100644 --- a/config.tpl.yml +++ b/config.tpl.yml @@ -177,6 +177,8 @@ locked_down_network: public_ip: true # Enable public IP creation for Jumpbox, OnDemand and create images. Default to true # Base image configuration. Can be either an image reference or an image_id from the image registry or a custom managed image +# For AlmaLinux 8.7 use almalinux:almalinux-x86_64:8_7-gen2:latest +# For CentOS 7.9 use OpenLogic:CentOS:7_9-gen2:latest linux_base_image: "OpenLogic:CentOS:7_9-gen2:latest" # publisher:offer:sku:version or image_id # linux image plan if required, format is publisher:product:name #linux_base_plan: diff --git a/docs/deploy/index.md b/docs/deploy/index.md index d9312fb48..880d7b623 100644 --- a/docs/deploy/index.md +++ b/docs/deploy/index.md @@ -537,6 +537,8 @@ locked_down_network: public_ip: true # Enable public IP creation for Jumpbox, OnDemand and create images. Default to true # Base image configuration. Can be either an image reference or an image_id from the image registry or a custom managed image +# For AlmaLinux 8.7 use almalinux:almalinux-x86_64:8_7-gen2:latest +# For CentOS 7.9 use OpenLogic:CentOS:7_9-gen2:latest linux_base_image: "OpenLogic:CentOS:7_9-gen2:latest" # publisher:offer:sku:version or image_id # linux image plan if required, format is publisher:product:name #linux_base_plan: From e95625e65e3e425ceb186299fa7f32b85f5155f5 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 31 Aug 2023 12:59:38 +0200 Subject: [PATCH 27/35] update package for security --- packer/scripts/almalinux/interactive-desktop-3d.sh | 2 ++ packer/scripts/almalinux/linux-setup.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/packer/scripts/almalinux/interactive-desktop-3d.sh b/packer/scripts/almalinux/interactive-desktop-3d.sh index 05a3bb17d..771e445c1 100644 --- a/packer/scripts/almalinux/interactive-desktop-3d.sh +++ b/packer/scripts/almalinux/interactive-desktop-3d.sh @@ -108,3 +108,5 @@ cat << EOF >>/etc/sysctl.conf net.core.rmem_max=2097152 net.core.wmem_max=2097152 EOF + +dnf -y update --security diff --git a/packer/scripts/almalinux/linux-setup.sh b/packer/scripts/almalinux/linux-setup.sh index a3cd9e973..9e09a2511 100644 --- a/packer/scripts/almalinux/linux-setup.sh +++ b/packer/scripts/almalinux/linux-setup.sh @@ -29,3 +29,5 @@ EOF cd /usr/local/bin wget -q https://aka.ms/downloadazcopy-v10-linux -O - | tar zxf - --strip-components 1 --wildcards '*/azcopy' chmod 755 /usr/local/bin/azcopy + +dnf -y update --security From f29c58f8fcc3e90754e35af16d02cf90f9b9a283 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 31 Aug 2023 17:59:20 +0200 Subject: [PATCH 28/35] Munge on Alma --- playbooks/roles/munge/tasks/main.yml | 16 +++++++++++++++- playbooks/roles/munge/vars/AlmaLinux/8.yml | 2 ++ playbooks/roles/munge/vars/CentOS/7.yml | 4 ++++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 playbooks/roles/munge/vars/AlmaLinux/8.yml create mode 100644 playbooks/roles/munge/vars/CentOS/7.yml diff --git a/playbooks/roles/munge/tasks/main.yml b/playbooks/roles/munge/tasks/main.yml index bb1bfd0e3..3330dae0e 100644 --- a/playbooks/roles/munge/tasks/main.yml +++ b/playbooks/roles/munge/tasks/main.yml @@ -1,4 +1,17 @@ --- +- name: Include distribution variables + block: + - name: Include distribution variables + ansible.builtin.include_vars: "{{ item }}" + with_first_found: + - "{{ ansible_distribution }}/{{ ansible_distribution_major_version }}.yml" + - "{{ ansible_distribution }}.yml" + tags: [ 'always' ] + rescue: + - name: Change error message + ansible.builtin.fail: + msg: "unable to find anything in vars/ for this operating system! {{ ansible_distribution }}" + - name: debug debug: msg: @@ -30,8 +43,9 @@ - name: install munge yum: - name: munge, munge-libs, munge-devel + name: '{{item}}' lock_timeout : 180 + loop: '{{munge_packages}}' - name: check local munge dir file: diff --git a/playbooks/roles/munge/vars/AlmaLinux/8.yml b/playbooks/roles/munge/vars/AlmaLinux/8.yml new file mode 100644 index 000000000..7bf71ee52 --- /dev/null +++ b/playbooks/roles/munge/vars/AlmaLinux/8.yml @@ -0,0 +1,2 @@ +munge_packages: + - munge \ No newline at end of file diff --git a/playbooks/roles/munge/vars/CentOS/7.yml b/playbooks/roles/munge/vars/CentOS/7.yml new file mode 100644 index 000000000..c672c9ed9 --- /dev/null +++ b/playbooks/roles/munge/vars/CentOS/7.yml @@ -0,0 +1,4 @@ +munge_packages: + - munge + - munge-libs + - munge-devel From d65e3c70ce3addcf7509c632b0a415aa63167e4e Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Thu, 31 Aug 2023 17:59:34 +0200 Subject: [PATCH 29/35] Slurm Server on Alma --- playbooks/roles/slurmserver/tasks/main.yml | 42 ++++++++----------- .../roles/slurmserver/vars/AlmaLinux/8.yml | 10 +++++ playbooks/roles/slurmserver/vars/CentOS/7.yml | 10 +++++ 3 files changed, 38 insertions(+), 24 deletions(-) create mode 100644 playbooks/roles/slurmserver/vars/AlmaLinux/8.yml create mode 100644 playbooks/roles/slurmserver/vars/CentOS/7.yml diff --git a/playbooks/roles/slurmserver/tasks/main.yml b/playbooks/roles/slurmserver/tasks/main.yml index cba2becda..1e3855c1a 100644 --- a/playbooks/roles/slurmserver/tasks/main.yml +++ b/playbooks/roles/slurmserver/tasks/main.yml @@ -1,4 +1,17 @@ --- +- name: Include distribution variables + block: + - name: Include distribution variables + ansible.builtin.include_vars: "{{ item }}" + with_first_found: + - "{{ ansible_distribution }}/{{ ansible_distribution_major_version }}.yml" + - "{{ ansible_distribution }}.yml" + tags: [ 'always' ] + rescue: + - name: Change error message + ansible.builtin.fail: + msg: "unable to find anything in vars/ for this operating system! {{ ansible_distribution }}" + - name: add slurm group group: name: slurm @@ -153,19 +166,7 @@ url: https://github.com/Azure/cyclecloud-slurm/releases/download/{{cyclecloud_slurm_release}}/{{ item }} dest: '{{homedir_mountpoint}}/slurm/rpms' timeout: 180 - with_items: - - 'slurm-{{slurm_version}}.el7.x86_64.rpm' - - 'slurm-contribs-{{slurm_version}}.el7.x86_64.rpm' - - 'slurm-devel-{{slurm_version}}.el7.x86_64.rpm' - - 'slurm-example-configs-{{slurm_version}}.el7.x86_64.rpm' - - 'slurm-libpmi-{{slurm_version}}.el7.x86_64.rpm' - - 'slurm-openlava-{{slurm_version}}.el7.x86_64.rpm' - - 'slurm-pam_slurm-{{slurm_version}}.el7.x86_64.rpm' - - 'slurm-perlapi-{{slurm_version}}.el7.x86_64.rpm' - - 'slurm-slurmctld-{{slurm_version}}.el7.x86_64.rpm' - - 'slurm-slurmd-{{slurm_version}}.el7.x86_64.rpm' - - 'slurm-slurmdbd-{{slurm_version}}.el7.x86_64.rpm' - - 'slurm-torque-{{slurm_version}}.el7.x86_64.rpm' + loop: '{{slurm_server_packages}}' when: not build_rpms - name: query the size of the installed slurm RPM @@ -174,7 +175,7 @@ ignore_errors: yes - name: get the size of the slurm RPM package file - shell: rpm -qip {{homedir_mountpoint}}/slurm/rpms/slurm-{{slurm_version}}.el7.x86_64.rpm | grep Size | awk '{print $3}' + shell: rpm -qip {{homedir_mountpoint}}/slurm/rpms/{{slurm_server_packages[0]}} | grep Size | awk '{print $3}' register: downloaded_slurm_size ignore_errors: yes @@ -184,19 +185,12 @@ - name: install slurm yum: - name: - - '{{homedir_mountpoint}}/slurm/rpms/slurm-{{slurm_version}}.el7.x86_64.rpm' - - '{{homedir_mountpoint}}/slurm/rpms/slurm-devel-{{slurm_version}}.el7.x86_64.rpm' - - '{{homedir_mountpoint}}/slurm/rpms/slurm-example-configs-{{slurm_version}}.el7.x86_64.rpm' - - '{{homedir_mountpoint}}/slurm/rpms/slurm-slurmctld-{{slurm_version}}.el7.x86_64.rpm' - - '{{homedir_mountpoint}}/slurm/rpms/slurm-slurmd-{{slurm_version}}.el7.x86_64.rpm' - - '{{homedir_mountpoint}}/slurm/rpms/slurm-perlapi-{{slurm_version}}.el7.x86_64.rpm' - - '{{homedir_mountpoint}}/slurm/rpms/slurm-torque-{{slurm_version}}.el7.x86_64.rpm' - - '{{homedir_mountpoint}}/slurm/rpms/slurm-openlava-{{slurm_version}}.el7.x86_64.rpm' - - '{{homedir_mountpoint}}/slurm/rpms/slurm-slurmdbd-{{slurm_version}}.el7.x86_64.rpm' + name: '{{homedir_mountpoint}}/slurm/rpms/{{item}}' state: present + disable_gpg_check: true # allow_downgrade: true lock_timeout : 180 + loop: '{{slurm_server_packages}}' - name: create slurm config template: diff --git a/playbooks/roles/slurmserver/vars/AlmaLinux/8.yml b/playbooks/roles/slurmserver/vars/AlmaLinux/8.yml new file mode 100644 index 000000000..0f4b4aee2 --- /dev/null +++ b/playbooks/roles/slurmserver/vars/AlmaLinux/8.yml @@ -0,0 +1,10 @@ +# - 'slurm-openlava-{{slurm_version}}.el8.x86_64.rpm' +slurm_server_packages: + - 'slurm-{{slurm_version}}.el8.x86_64.rpm' + - 'slurm-devel-{{slurm_version}}.el8.x86_64.rpm' + - 'slurm-example-configs-{{slurm_version}}.el8.x86_64.rpm' + - 'slurm-perlapi-{{slurm_version}}.el8.x86_64.rpm' + - 'slurm-slurmctld-{{slurm_version}}.el8.x86_64.rpm' + - 'slurm-slurmd-{{slurm_version}}.el8.x86_64.rpm' + - 'slurm-slurmdbd-{{slurm_version}}.el8.x86_64.rpm' + - 'slurm-torque-{{slurm_version}}.el8.x86_64.rpm' diff --git a/playbooks/roles/slurmserver/vars/CentOS/7.yml b/playbooks/roles/slurmserver/vars/CentOS/7.yml new file mode 100644 index 000000000..ebedf4f7c --- /dev/null +++ b/playbooks/roles/slurmserver/vars/CentOS/7.yml @@ -0,0 +1,10 @@ +# - 'slurm-openlava-{{slurm_version}}.el7.x86_64.rpm' +slurm_server_packages: + - 'slurm-{{slurm_version}}.el7.x86_64.rpm' + - 'slurm-devel-{{slurm_version}}.el7.x86_64.rpm' + - 'slurm-example-configs-{{slurm_version}}.el7.x86_64.rpm' + - 'slurm-perlapi-{{slurm_version}}.el7.x86_64.rpm' + - 'slurm-slurmctld-{{slurm_version}}.el7.x86_64.rpm' + - 'slurm-slurmd-{{slurm_version}}.el7.x86_64.rpm' + - 'slurm-slurmdbd-{{slurm_version}}.el7.x86_64.rpm' + - 'slurm-torque-{{slurm_version}}.el7.x86_64.rpm' From 379c99c7180bac84deedc66f87e7e5c37ef0d204 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Mon, 4 Sep 2023 11:00:57 +0200 Subject: [PATCH 30/35] fix tmp dir for pyxis --- playbooks/roles/slurmserver/tasks/pyxis.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/playbooks/roles/slurmserver/tasks/pyxis.yml b/playbooks/roles/slurmserver/tasks/pyxis.yml index 30616d812..e3d875df7 100644 --- a/playbooks/roles/slurmserver/tasks/pyxis.yml +++ b/playbooks/roles/slurmserver/tasks/pyxis.yml @@ -4,22 +4,28 @@ state: present lock_timeout : 180 +- name: Create temporary pyxis directory + ansible.builtin.tempfile: + state: directory + suffix: pyxis + register: tmp_pyxis_dir + - name: Download Pyxis plugin unarchive: src: https://github.com/NVIDIA/pyxis/archive/refs/tags/v{{pyxis_version}}.tar.gz - dest: /mnt/resource + dest: '{{tmp_pyxis_dir.path}}' remote_src: yes - name: Build Pyxis shell: make args: - chdir: /mnt/resource/pyxis-{{pyxis_version}} + chdir: '{{tmp_pyxis_dir.path}}/pyxis-{{pyxis_version}}' creates: /usr/lib64/slurm/spank_pyxis.so become: yes - name: copy the plugin to the share copy: - src: /mnt/resource/pyxis-{{pyxis_version}}/spank_pyxis.so + src: '{{tmp_pyxis_dir.path}}/pyxis-{{pyxis_version}}/spank_pyxis.so' dest: '{{homedir_mountpoint}}/slurm/' remote_src: yes mode: 0755 From 4c9af254719e22c3343fbb2a9c49160d1b25ef21 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Mon, 4 Sep 2023 11:32:42 +0200 Subject: [PATCH 31/35] slurm client for AlmaLinux --- playbooks/ood.yml | 2 +- playbooks/roles/slurmclient/tasks/main.yml | 15 ++++++++++++++- playbooks/roles/slurmclient/vars/AlmaLinux/8.yml | 1 + playbooks/roles/slurmclient/vars/CentOS/7.yml | 1 + 4 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 playbooks/roles/slurmclient/vars/AlmaLinux/8.yml create mode 100644 playbooks/roles/slurmclient/vars/CentOS/7.yml diff --git a/playbooks/ood.yml b/playbooks/ood.yml index e2eaa7b17..7b5ade0df 100644 --- a/playbooks/ood.yml +++ b/playbooks/ood.yml @@ -237,7 +237,7 @@ apply: become: true vars: - cc_slurm_version: '{{slurm.slurm_version | default("20.11.9")}}-1' + slurm_version: '{{slurm.slurm_version | default("20.11.9")}}-1' when: ( queue_manager is defined and queue_manager == "slurm" ) - name: setup cyclecloud proxy diff --git a/playbooks/roles/slurmclient/tasks/main.yml b/playbooks/roles/slurmclient/tasks/main.yml index ed73eee5d..2b46ff9cb 100644 --- a/playbooks/roles/slurmclient/tasks/main.yml +++ b/playbooks/roles/slurmclient/tasks/main.yml @@ -1,4 +1,16 @@ --- +- name: Include distribution variables + block: + - name: Include distribution variables + ansible.builtin.include_vars: "{{ item }}" + with_first_found: + - "{{ ansible_distribution }}/{{ ansible_distribution_major_version }}.yml" + - "{{ ansible_distribution }}.yml" + tags: [ 'always' ] + rescue: + - name: Change error message + ansible.builtin.fail: + msg: "unable to find anything in vars/ for this operating system! {{ ansible_distribution }}" - name: install epel yum: @@ -10,10 +22,11 @@ - name: install slurmd client utilities yum: name: - - '{{homedir_mountpoint}}/slurm/rpms/slurm-{{cc_slurm_version}}.el7.x86_64.rpm' + - '{{homedir_mountpoint}}/slurm/rpms/{{slurmd_package}}' state: present allow_downgrade: yes lock_timeout : 180 + disable_gpg_check: true - name: check slurm config directory file: diff --git a/playbooks/roles/slurmclient/vars/AlmaLinux/8.yml b/playbooks/roles/slurmclient/vars/AlmaLinux/8.yml new file mode 100644 index 000000000..212fd2c50 --- /dev/null +++ b/playbooks/roles/slurmclient/vars/AlmaLinux/8.yml @@ -0,0 +1 @@ +slurmd_package: 'slurm-{{slurm_version}}.el8.x86_64.rpm' diff --git a/playbooks/roles/slurmclient/vars/CentOS/7.yml b/playbooks/roles/slurmclient/vars/CentOS/7.yml new file mode 100644 index 000000000..6aefaffc9 --- /dev/null +++ b/playbooks/roles/slurmclient/vars/CentOS/7.yml @@ -0,0 +1 @@ +slurmd_package: 'slurm-{{slurm_version}}.el7.x86_64.rpm' From 83b6a192dedb9892e68f44a6729bcd568a361402 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Mon, 4 Sep 2023 15:50:45 +0200 Subject: [PATCH 32/35] fix copy plugin when rerunning --- playbooks/roles/slurmserver/tasks/pyxis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playbooks/roles/slurmserver/tasks/pyxis.yml b/playbooks/roles/slurmserver/tasks/pyxis.yml index e3d875df7..23c3d3948 100644 --- a/playbooks/roles/slurmserver/tasks/pyxis.yml +++ b/playbooks/roles/slurmserver/tasks/pyxis.yml @@ -25,7 +25,7 @@ - name: copy the plugin to the share copy: - src: '{{tmp_pyxis_dir.path}}/pyxis-{{pyxis_version}}/spank_pyxis.so' + src: '/usr/lib64/slurm/spank_pyxis.so' dest: '{{homedir_mountpoint}}/slurm/' remote_src: yes mode: 0755 From 19184a02df079cff07cc646c739daa9b9cb4e724 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Mon, 4 Sep 2023 15:57:47 +0200 Subject: [PATCH 33/35] fix restart of httpd on alma --- playbooks/ood.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/playbooks/ood.yml b/playbooks/ood.yml index 7b5ade0df..61257fd2f 100644 --- a/playbooks/ood.yml +++ b/playbooks/ood.yml @@ -448,10 +448,10 @@ - ansible_distribution == 'CentOS' - ansible_distribution_major_version == '7' - - name: Ensure apache is running. + - name: Ensure apache is restarted service: name: httpd - state: started + state: restarted enabled: yes when: - ansible_distribution == 'AlmaLinux' From 086259185d0aa52e586fca2a27020d34fd438049 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Mon, 4 Sep 2023 16:38:31 +0200 Subject: [PATCH 34/35] Add infra_os --- .github/workflows/all_bicep.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/all_bicep.yml b/.github/workflows/all_bicep.yml index 6bd15eae1..6eadd7f7e 100644 --- a/.github/workflows/all_bicep.yml +++ b/.github/workflows/all_bicep.yml @@ -3,7 +3,7 @@ name : All Bicep End to End on: workflow_dispatch: schedule: - - cron: "10 2 * * 1" # At 02:10 UTC on each Sunday + - cron: "10 4 * * 0" # At 04:10 UTC on each Saturday jobs: deploy: @@ -15,6 +15,7 @@ jobs: home_type: ['azurefiles', 'anf'] user_auth: ['local', 'ad'] monitoring: ['none', 'grafana'] + infra_os: ['centos', 'almalinux'] uses: ./.github/workflows/z_base_callable.yml with: resource_group: 'AUTO_GENERATED' @@ -28,6 +29,7 @@ jobs: clean: 'true' ui_tests: 'true' monitoring: ${{ matrix.monitoring }} + infra_os: ${{ matrix.infra_os }} secrets: ARM_CLIENT_SECRET: ${{ secrets.ARM_CLIENT_SECRET }} ARM_CLIENT_ID: ${{ secrets.ARM_CLIENT_ID }} From 6d0dee21cf98381c77c6da7a38b7df6c0a963ff3 Mon Sep 17 00:00:00 2001 From: Xavier Pillons Date: Mon, 4 Sep 2023 17:11:01 +0200 Subject: [PATCH 35/35] always build pyxis --- playbooks/roles/slurmserver/tasks/pyxis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/playbooks/roles/slurmserver/tasks/pyxis.yml b/playbooks/roles/slurmserver/tasks/pyxis.yml index 23c3d3948..b3143ed28 100644 --- a/playbooks/roles/slurmserver/tasks/pyxis.yml +++ b/playbooks/roles/slurmserver/tasks/pyxis.yml @@ -20,12 +20,12 @@ shell: make args: chdir: '{{tmp_pyxis_dir.path}}/pyxis-{{pyxis_version}}' - creates: /usr/lib64/slurm/spank_pyxis.so +# creates: /usr/lib64/slurm/spank_pyxis.so become: yes - name: copy the plugin to the share copy: - src: '/usr/lib64/slurm/spank_pyxis.so' + src: '{{tmp_pyxis_dir.path}}/pyxis-{{pyxis_version}}/spank_pyxis.so' dest: '{{homedir_mountpoint}}/slurm/' remote_src: yes mode: 0755