Skip to content

Commit

Permalink
Merge pull request #38 from Gerbenvandervries/develop
Browse files Browse the repository at this point in the history
Slurm role bugfixes for dynamic resources.
  • Loading branch information
pneerincx authored Jan 24, 2019
2 parents bcf0a16 + 7b89237 commit 6e27a81
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 19 deletions.
2 changes: 1 addition & 1 deletion gearshift_hosts.ini
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ gs-vcompute[01:11]
[cluster:children]
compute-vm
administration
jumphost

[gearshift-cluster:children]
cluster
jumphost

[metal]
gs-openstack
Expand Down
3 changes: 3 additions & 0 deletions host_vars/hc-sai
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
mailhub: 192.168.0.5
rewrite_domain: hc-sai.gcc.rug.nl
2 changes: 1 addition & 1 deletion hyperchicken_hosts.ini
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ hc-vcompute[01:05]
[cluster:children]
compute-vm
administration
jumphost

[hyperchicken-cluster:children]
cluster
jumphost
12 changes: 6 additions & 6 deletions roles/slurm-client/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,23 +47,23 @@
- name: /etc/slurm
owner: root
group: root
mode: 0755
mode: '0755'
- name: /etc/nhc
owner: root
group: root
mode: 0755
mode: '0755'
- name: /var/log/slurm
owner: root
group: root
mode: 0750
mode: '0750'
- name: /var/spool/slurm
owner: slurm
group: root
mode: 0750
mode: '0750'
- name: /var/spool/slurmd
owner: slurm
group: root
mode: 0750
mode: '0750'

- name: Deploy slurm.conf
template:
Expand Down Expand Up @@ -97,7 +97,7 @@

- name: Deploy nhc.conf
template:
src: template/nhc.conf
src: templates/nhc.conf
dest: /etc/nhc/nhc.conf
owner: root
group: root
Expand Down
2 changes: 1 addition & 1 deletion roles/slurm/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ munge_gid: 498
# * number of vcompute nodes in inventory and
# * cores per node, mem per node, etc. as specified in group_vars for cluster.
#
vcompute_host_count: "{{ groups['compute-vm']|length }}"
vcompute_host_count: "{{ groups['compute-vm']|list|length }}"
cluster_cores_total: "{{ vcompute_host_count|int * vcompute_max_cpus_per_node|int }}"
cluster_mem_total: "{{ vcompute_host_count|int * vcompute_max_mem_per_node|int }}"
...
18 changes: 9 additions & 9 deletions roles/slurm/files/configure_slurm_accounting_db.bash
Original file line number Diff line number Diff line change
Expand Up @@ -59,22 +59,22 @@ sacctmgr -i create qos set \
Name='regular-short' \
Priority=10 \
Description='regular-short' \
GrpSubmit=30000 MaxSubmitJobsPU=5000 MaxWall=06:00:00
GrpSubmit=30000 MaxSubmitJobsPU=5000 MaxWall=06:00:00

sacctmgr -i create qos set \
Name='regular-medium' \
Priority=10 \
Description='regular-medium' \
GrpSubmit=30000 MaxSubmitJobsPU=5000 MaxWall=1-00:00:00 \
MaxTRESPU=cpu={{ (cluster_cores_total * 0.4) | int }},mem={{ (cluster_mem_total * 0.4) | int }}
MaxTRESPU=cpu={{ (cluster_cores_total | float * 0.4) | int }},mem={{ (cluster_mem_total | float * 0.4) | int }}

sacctmgr -i create qos set \
Name='regular-long' \
Priority=10 \
Description='regular-long' \
GrpSubmit=3000 MaxSubmitJobsPU=1000 MaxWall=7-00:00:00 \
GrpTRES=cpu={{ (cluster_cores_total * 0.3) | int }},mem={{ (cluster_mem_total * 0.3) | int }} \
MaxTRESPU=cpu={{ (cluster_cores_total * 0.15) | int }},mem={{ (cluster_mem_total * 0.15) | int }}
GrpTRES=cpu={{ (cluster_cores_total | float * 0.3) | int }},mem={{ (cluster_mem_total | float * 0.3) | int }} \
MaxTRESPU=cpu={{ (cluster_cores_total | float * 0.15) | int }},mem={{ (cluster_mem_total | float * 0.15) | int }}

#
# QoS priority
Expand All @@ -93,25 +93,25 @@ sacctmgr -i create qos set \
UsageFactor=2 \
Description='priority-short' \
GrpSubmit=5000 MaxSubmitJobsPU=1000 MaxWall=06:00:00 \
MaxTRESPU=cpu={{ (cluster_cores_total * 0.25) | int }},mem={{ (cluster_mem_total * 0.25) | int }}
MaxTRESPU=cpu={{ (cluster_cores_total | float * 0.25) | int }},mem={{ (cluster_mem_total | float * 0.25) | int }}

sacctmgr -i create qos set \
Name='priority-medium' \
Priority=20 \
UsageFactor=2 \
Description='priority-medium' \
GrpSubmit=2500 MaxSubmitJobsPU=500 MaxWall=1-00:00:00 \
GrpTRES=cpu={{ (cluster_cores_total * 0.5) | int }},mem={{ (cluster_mem_total * 0.5) | int }} \
MaxTRESPU=cpu={{ (cluster_cores_total * 0.2) | int }},mem={{ (cluster_mem_total * 0.2) | int }}
GrpTRES=cpu={{ (cluster_cores_total | float * 0.5) | int }},mem={{ (cluster_mem_total | float * 0.5) | int }} \
MaxTRESPU=cpu={{ (cluster_cores_total | float * 0.2) | int }},mem={{ (cluster_mem_total | float * 0.2) | int }}

sacctmgr -i create qos set \
Name='priority-long' \
Priority=20 \
UsageFactor=2 \
Description='priority-long' \
GrpSubmit=250 MaxSubmitJobsPU=50 MaxWall=7-00:00:00 \
GrpTRES=cpu={{ (cluster_cores_total * 0.2) | int }},mem={{ (cluster_mem_total * 0.2) | int }} \
MaxTRESPU=cpu={{ (cluster_cores_total * 0.1) | int }},mem={{ (cluster_mem_total * 0.1) | int }}
GrpTRES=cpu={{ (cluster_cores_total | float * 0.2) | int }},mem={{ (cluster_mem_total | float * 0.2) | int }} \
MaxTRESPU=cpu={{ (cluster_cores_total | float * 0.1) | int }},mem={{ (cluster_mem_total | float * 0.1) | int }}

#
# QoS ds
Expand Down
2 changes: 1 addition & 1 deletion talos_hosts.ini
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ tl-vcompute[01:03]
[cluster:children]
compute-vm
administration
jumphost

[talos-cluster:children]
cluster
jumphost

0 comments on commit 6e27a81

Please sign in to comment.