From bca789b26d79e29903f2a7f0abeeaf4e8a381c53 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Thu, 24 Jan 2019 13:07:36 +0100 Subject: [PATCH 1/4] slurm role bugfixes dynamic resources --- host_vars/hc-sai | 3 +++ roles/slurm-client/tasks/main.yml | 12 ++++++------ roles/slurm/files/configure_slurm_accounting_db.bash | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 host_vars/hc-sai diff --git a/host_vars/hc-sai b/host_vars/hc-sai new file mode 100644 index 000000000..b13ae5593 --- /dev/null +++ b/host_vars/hc-sai @@ -0,0 +1,3 @@ +--- +mailhub: 192.168.0.5 +rewrite_domain: hc-sai.gcc.rug.nl diff --git a/roles/slurm-client/tasks/main.yml b/roles/slurm-client/tasks/main.yml index d54f9564f..382591840 100644 --- a/roles/slurm-client/tasks/main.yml +++ b/roles/slurm-client/tasks/main.yml @@ -47,23 +47,23 @@ - name: /etc/slurm owner: root group: root - mode: 0755 + mode: '0755' - name: /etc/nhc owner: root group: root - mode: 0755 + mode: '0755' - name: /var/log/slurm owner: root group: root - mode: 0750 + mode: '0750' - name: /var/spool/slurm owner: slurm group: root - mode: 0750 + mode: '0750' - name: /var/spool/slurmd owner: slurm group: root - mode: 0750 + mode: '0750' - name: Deploy slurm.conf template: @@ -97,7 +97,7 @@ - name: Deploy nhc.conf template: - src: template/nhc.conf + src: templates/nhc.conf dest: /etc/nhc/nhc.conf owner: root group: root diff --git a/roles/slurm/files/configure_slurm_accounting_db.bash b/roles/slurm/files/configure_slurm_accounting_db.bash index b2916e838..5a57d9c15 100644 --- a/roles/slurm/files/configure_slurm_accounting_db.bash +++ b/roles/slurm/files/configure_slurm_accounting_db.bash @@ -59,7 +59,7 @@ sacctmgr -i create qos set \ Name='regular-short' \ Priority=10 \ Description='regular-short' \ - GrpSubmit=30000 MaxSubmitJobsPU=5000 MaxWall=06:00:00 + GrpSubmit=30000 MaxSubmitJobsPU=5000 MaxWall=06:00:00 sacctmgr -i create qos set \ Name='regular-medium' \ From 97c0c769e6e3c7f5b2d0429fce7914de2da978f5 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Thu, 24 Jan 2019 13:58:41 +0100 Subject: [PATCH 2/4] fix float multipling --- .../files/configure_slurm_accounting_db.bash | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/roles/slurm/files/configure_slurm_accounting_db.bash b/roles/slurm/files/configure_slurm_accounting_db.bash index 5a57d9c15..97fc90792 100644 --- a/roles/slurm/files/configure_slurm_accounting_db.bash +++ b/roles/slurm/files/configure_slurm_accounting_db.bash @@ -66,15 +66,15 @@ sacctmgr -i create qos set \ Priority=10 \ Description='regular-medium' \ GrpSubmit=30000 MaxSubmitJobsPU=5000 MaxWall=1-00:00:00 \ - MaxTRESPU=cpu={{ (cluster_cores_total * 0.4) | int }},mem={{ (cluster_mem_total * 0.4) | int }} + MaxTRESPU=cpu={{ (cluster_cores_total | float * 0.4) | int }},mem={{ (cluster_mem_total | float * 0.4) | int }} sacctmgr -i create qos set \ Name='regular-long' \ Priority=10 \ Description='regular-long' \ GrpSubmit=3000 MaxSubmitJobsPU=1000 MaxWall=7-00:00:00 \ - GrpTRES=cpu={{ (cluster_cores_total * 0.3) | int }},mem={{ (cluster_mem_total * 0.3) | int }} \ - MaxTRESPU=cpu={{ (cluster_cores_total * 0.15) | int }},mem={{ (cluster_mem_total * 0.15) | int }} + GrpTRES=cpu={{ (cluster_cores_total | float * 0.3) | int }},mem={{ (cluster_mem_total | float * 0.3) | int }} \ + MaxTRESPU=cpu={{ (cluster_cores_total | float * 0.15) | int }},mem={{ (cluster_mem_total | float * 0.15) | int }} # # QoS priority @@ -93,7 +93,7 @@ sacctmgr -i create qos set \ UsageFactor=2 \ Description='priority-short' \ GrpSubmit=5000 MaxSubmitJobsPU=1000 MaxWall=06:00:00 \ - MaxTRESPU=cpu={{ (cluster_cores_total * 0.25) | int }},mem={{ (cluster_mem_total * 0.25) | int }} + MaxTRESPU=cpu={{ (cluster_cores_total | float * 0.25) | int }},mem={{ (cluster_mem_total | float * 0.25) | int }} sacctmgr -i create qos set \ Name='priority-medium' \ @@ -101,8 +101,8 @@ sacctmgr -i create qos set \ UsageFactor=2 \ Description='priority-medium' \ GrpSubmit=2500 MaxSubmitJobsPU=500 MaxWall=1-00:00:00 \ - GrpTRES=cpu={{ (cluster_cores_total * 0.5) | int }},mem={{ (cluster_mem_total * 0.5) | int }} \ - MaxTRESPU=cpu={{ (cluster_cores_total * 0.2) | int }},mem={{ (cluster_mem_total * 0.2) | int }} + GrpTRES=cpu={{ (cluster_cores_total | float * 0.5) | int }},mem={{ (cluster_mem_total | float * 0.5) | int }} \ + MaxTRESPU=cpu={{ (cluster_cores_total | float * 0.2) | int }},mem={{ (cluster_mem_total | float * 0.2) | int }} sacctmgr -i create qos set \ Name='priority-long' \ @@ -110,8 +110,8 @@ sacctmgr -i create qos set \ UsageFactor=2 \ Description='priority-long' \ GrpSubmit=250 MaxSubmitJobsPU=50 MaxWall=7-00:00:00 \ - GrpTRES=cpu={{ (cluster_cores_total * 0.2) | int }},mem={{ (cluster_mem_total * 0.2) | int }} \ - MaxTRESPU=cpu={{ (cluster_cores_total * 0.1) | int }},mem={{ (cluster_mem_total * 0.1) | int }} + GrpTRES=cpu={{ (cluster_cores_total | float * 0.2) | int }},mem={{ (cluster_mem_total | float * 0.2) | int }} \ + MaxTRESPU=cpu={{ (cluster_cores_total | float * 0.1) | int }},mem={{ (cluster_mem_total | float * 0.1) | int }} # # QoS ds From cac433df5712effd074552b55db05bf3c6f847a0 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Thu, 24 Jan 2019 14:06:50 +0100 Subject: [PATCH 3/4] convert groups to list --- roles/slurm/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/slurm/defaults/main.yml b/roles/slurm/defaults/main.yml index 507fc5d86..ff315b63d 100644 --- a/roles/slurm/defaults/main.yml +++ b/roles/slurm/defaults/main.yml @@ -9,7 +9,7 @@ munge_gid: 498 # * number of vcompute nodes in inventory and # * cores per node, mem per node, etc. as specified in group_vars for cluster. # -vcompute_host_count: "{{ groups['compute-vm']|length }}" +vcompute_host_count: "{{ groups['compute-vm']|list|length }}" cluster_cores_total: "{{ vcompute_host_count|int * vcompute_max_cpus_per_node|int }}" cluster_mem_total: "{{ vcompute_host_count|int * vcompute_max_mem_per_node|int }}" ... From 7b89237f72363b3fe1cca740bb44aa66141f89c4 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Thu, 24 Jan 2019 14:10:14 +0100 Subject: [PATCH 4/4] jumphost to different group --- gearshift_hosts.ini | 2 +- hyperchicken_hosts.ini | 2 +- talos_hosts.ini | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gearshift_hosts.ini b/gearshift_hosts.ini index a3aa2cddd..07a7483d1 100644 --- a/gearshift_hosts.ini +++ b/gearshift_hosts.ini @@ -64,10 +64,10 @@ gs-vcompute[01:11] [cluster:children] compute-vm administration -jumphost [gearshift-cluster:children] cluster +jumphost [metal] gs-openstack diff --git a/hyperchicken_hosts.ini b/hyperchicken_hosts.ini index 83cee63bb..eb5c64c34 100644 --- a/hyperchicken_hosts.ini +++ b/hyperchicken_hosts.ini @@ -24,7 +24,7 @@ hc-vcompute[01:05] [cluster:children] compute-vm administration -jumphost [hyperchicken-cluster:children] cluster +jumphost diff --git a/talos_hosts.ini b/talos_hosts.ini index c8bc849c7..5637aeacc 100644 --- a/talos_hosts.ini +++ b/talos_hosts.ini @@ -24,7 +24,7 @@ tl-vcompute[01:03] [cluster:children] compute-vm administration -jumphost [talos-cluster:children] cluster +jumphost