Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/cgroup role #705

Merged
merged 9 commits into from
Jan 24, 2023
22 changes: 22 additions & 0 deletions roles/cgroups/handlers/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
#
# Important: maintain correct handler order.
# Handlers are executed in the order in which they are defined
# and not in the order in which they are listed in a "notify: handler_name" statement!
#
- name: Restart cgconfig service.
ansible.builtin.systemd:
name: cgconfig.service
state: restarted
daemon_reload: true
become: true
listen: restart_cgconfig

- name: Restart cgred service.
ansible.builtin.systemd:
name: cgred.service
state: restarted
daemon_reload: true
become: true
listen: restart_cgred
...
70 changes: 70 additions & 0 deletions roles/cgroups/tasks/configure_cgroups.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#
# Note:
# * This role currently works with cgroups v1
# and will need te bo refactored for cgroups v2.
# * This role does not use systemd to manage cgroups,
# because systemd currently only supports setting limits per user
# and does not support setting limits per groups of users (yet).
# See: https://github.com/systemd/systemd/issues/12989
# * Inspired by discussion at:
# https://groups.google.com/g/slurm-users/c/Gt6Vof3E79U
#
---
- name: Install libcgroup.
ansible.builtin.yum:
state: latest
update_cache: true
name:
- libcgroup
- libcgroup-tools
notify:
- restart_cgconfig
- restart_cgred
become: true

- name: Determine cpuset.cpus
ansible.builtin.command:
cmd: grep -o '[0-9]*$' /sys/fs/cgroup/cpuset/cpuset.cpus
changed_when: false
register: cgroups_cpuset_cpus

- name: Determine cpuset.mems
ansible.builtin.command:
cmd: cat /sys/fs/cgroup/cpuset/cpuset.mems
changed_when: false
register: cgroups_cpuset_mems

- name: Install /etc/cgconfig.d/regular_users.conf.
ansible.builtin.template:
src: templates/regular_users.conf.j2
dest: /etc/cgconfig.d/regular_users.conf
owner: root
group: root
mode: '0644'
vars:
cgroups_cpuset_mems_range: "{{ cgroups_cpuset_mems.stdout }}"
cgroups_cpuset_max_core_number: "{{ cgroups_cpuset_cpus.stdout }}"
notify: restart_cgconfig
become: true

- name: Install /etc/cgrules.conf.
ansible.builtin.template:
src: templates/cgrules.conf.j2
dest: /etc/cgrules.conf
owner: root
group: root
mode: '0644'
notify: restart_cgred
become: true

- name: Make sure services are enabled and started.
ansible.builtin.systemd:
name: "{{ item }}"
state: 'started'
enabled: 'yes'
daemon_reload: 'yes'
with_items:
- cgconfig.service
- cgred.service
become: true
...
12 changes: 12 additions & 0 deletions roles/cgroups/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
- name: Configure cgroups.
ansible.builtin.include_tasks:
file: configure_cgroups.yml
when:
- inventory_hostname in groups['user_interface'] | default([])
#
# Exclude Slurm-in-a-box machines, which are both user_interface and compute node at the same time,
# because otherwise configuring cgroups with this role may interfere with the cgroups configured by Slurm.
#
- inventory_hostname not in groups['compute_vm'] | default([])
...
10 changes: 10 additions & 0 deletions roles/cgroups/templates/cgrules.conf.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#
# Include an explicit rule for root.
# Otherwise, commands with the setuid bit set will inherit the original user's GID
# and end up in the @everyone group.
#
root cpuset,cpu,memory /
@admin cpuset,cpu,memory /
{% for regular_group in regular_groups | default([]) | sort %}
@{{ regular_group }} cpuset,cpu,memory regular_users/
{% endfor %}
52 changes: 52 additions & 0 deletions roles/cgroups/templates/regular_users.conf.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#
# No mounts configured here: already handled by systemd
#
# cpuset = /cgroup/cpuset;
# cpu = /cgroup/cpu;
# #cpuacct = /cgroup/cpuacct;
# memory = /cgroup/memory;
# #devices = /cgroup/devices;
# #freezer = /cgroup/freezer;
# #net_cls = /cgroup/net_cls;
# #blkio = /cgroup/blkio;
#}

#
# We do create limits for a group of users here as systemd cannot handle groups yet:
# systemd only handles per user limits or a limit for all users,
# but that would include daemons, admins, etc. too,
# which is not what we want.
#
group regular_users {
cpu {
cpu.shares=100;
}
cpuset {
cpuset.cpus={% if cgroups_cpuset_max_core_number | int > 0 %}1{% else %}0{% endif %}-{% if cgroups_cpuset_max_core_number | int > 2 %}{{ cgroups_cpuset_max_core_number | int - 1 }}{% else %}{{ cgroups_cpuset_max_core_number}}{% endif %};
cpuset.mems={{ cgroups_cpuset_mems_range }};
}
memory {
memory.limit_in_bytes={{ (ansible_memtotal_mb | float * 0.85 / 1024) | int }}G;
memory.soft_limit_in_bytes={{ (ansible_memtotal_mb | float * 0.75 / 1024) | int }}G;
memory.memsw.limit_in_bytes={{ (ansible_memtotal_mb | float * 0.85 / 1024) | int }}G;
}
}

#
# We do not use limits for individual users (yet).
# If required in the future, consider handling that via systemd.
#
#template regular_users/%U {
# cpu {
# cpu.shares=100;
# }
# cpuset {
# cpuset.cpus={% if cgroups_cpuset_max_core_number | int > 0 %}1{% else %}0{% endif %}-{% if cgroups_cpuset_max_core_number | int > 2 %}{{ cgroups_cpuset_max_core_number | int - 1 }}{% else %}{{ cgroups_cpuset_max_core_number}}{% endif %};
# cpuset.mems={{ cgroups_cpuset_mems_range }};
# }
# memory {
# memory.limit_in_bytes=4G;
# memory.soft_limit_in_bytes=2G;
# memory.memsw.limit_in_bytes=6G;
# }
#}
1 change: 1 addition & 0 deletions single_group_playbooks/user_interface.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
hosts:
- user_interface
roles:
- cgroups
- build_environment
- slurm_exporter
- slurm_client
Expand Down
7 changes: 7 additions & 0 deletions single_role_playbooks/cgroups.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
- name: Install cgroups role.
hosts:
- user_interface
roles:
- cgroups
...