Skip to content

Commit

Permalink
Merge pull request #74 from pneerincx/develop
Browse files Browse the repository at this point in the history
Updated playbooks for changed storage config and added support for jumphosts/dynamic inventories and more...
  • Loading branch information
Gerbenvandervries authored Mar 14, 2019
2 parents 17e7908 + 2d5e808 commit f82b10e
Show file tree
Hide file tree
Showing 17 changed files with 114 additions and 87 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
*.pyc
*.retry
*.swp
.vault_pass.txt
.vault_pass.txt*
documentation/.~lock.UMCG Research IT HPC cluster technical design.docx#
promtools/results/*
roles/hpc-cloud
Expand Down
17 changes: 2 additions & 15 deletions cluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- node_exporter
- cluster
- resolver
- shared_storage

- name: Install ansible on admin interfaces (DAI & SAI).
hosts:
Expand Down Expand Up @@ -52,9 +53,9 @@
- hosts: slurm
become: true
roles:
- slurm
- prom_server
- cadvisor
- slurm
vars:
# These variables are needed by the mariadb role.
# Which is a depencency of the slurm role.
Expand All @@ -68,8 +69,6 @@
tasks:
roles:
- compute-vm
- isilon
- datahandling
- slurm-client

- name: Install User Interface (UI)
Expand All @@ -79,19 +78,7 @@
roles:
- slurm_exporter
- user-interface
- datahandling
- isilon
- slurm-client

- name: Export /home on NFS server.
hosts: user-interface:&talos-cluster
roles:
- nfs_home_server

- name: Mount /home on NFS clients.
hosts: compute-vm&talos-cluster
roles:
- nfs_home_client

- import_playbook: local_admin_users.yml
...
7 changes: 7 additions & 0 deletions group_vars/all/vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ auth_users:
uid: 1016
pub_keys: |
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKOKgVel0GbF67zZaVR0TFo82e5XeZOP1e3Ld3gIdaER h.j.zilverberg
envsync:
comment: 'Environment Synchronizer'
uid: 1017
pub_keys: |
# No direct logins required: sudo only.
#
# Local group specs.
# Note:
Expand All @@ -109,4 +114,6 @@ auth_groups:
gid: 20001
users:
gid: 20002
depad:
gid: 20003
...
16 changes: 15 additions & 1 deletion group_vars/gearshift-cluster/vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,21 @@
slurm_cluster_name: 'gearshift'
slurm_cluster_domain: 'hpc.rug.nl'
stack_prefix: 'gs'
mailhub: '172.23.34.34'
rewrite_domain: "imperator.{{ slurm_cluster_domain }}"
motd: Vare, Vare, redde legiones!
additional_etc_hosts: |
172.23.40.21 gs-compute01 gs-compute01.hpc.local
172.23.40.22 gs-compute02 gs-compute02.hpc.local
172.23.40.23 gs-compute03 gs-compute03.hpc.local
172.23.40.24 gs-compute04 gs-compute04.hpc.local
172.23.40.25 gs-compute05 gs-compute05.hpc.local
172.23.40.26 gs-compute06 gs-compute06.hpc.local
172.23.40.27 gs-compute07 gs-compute07.hpc.local
172.23.40.28 gs-compute08 gs-compute08.hpc.local
172.23.40.29 gs-compute09 gs-compute09.hpc.local
172.23.40.30 gs-compute10 gs-compute10.hpc.local
172.23.40.31 gs-compute11 gs-compute11.hpc.local
vcompute_hostnames: "{{ stack_prefix }}-vcompute[01-11]"
vcompute_sockets: 2
vcompute_cores_per_socket: 14
Expand All @@ -26,7 +41,6 @@ ldap_binddn: cn=clusteradminumcg,o=asds
filter_passwd: '(|(rugpersonentitlementvalue=scz)(rugpersonentitlementvalue=umcg))'
filter_shadow: '(|(rugpersonentitlementvalue=scz)(rugpersonentitlementvalue=umcg))'
pam_authz_search: '(|(&(objectClass=posixGroup)(cn=co_bbmri_g-GRP_Gearshift)(memberUid=$username))(&(cn=$username)(rugpersonentitlementvalue=umcg)))'

nameservers: [
'172.23.40.244', # Order is important: local DNS for Isilon storage first!
'8.8.4.4', # Google DNS.
Expand Down
17 changes: 10 additions & 7 deletions group_vars/hyperchicken-cluster/vars.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
---
slurm_cluster_name: 'hyperchicken'
#slurm_cluster_domain: ''
slurm_cluster_domain: 'gcc.rug.nl'
stack_prefix: 'hc'
mailhub: '192.168.0.5'
rewrite_domain: "{{ stack_prefix }}-sai.{{ slurm_cluster_domain }}"
motd: "To solve or not to solve, that's the question."
vcompute_hostnames: "{{ stack_prefix }}-vcompute[01-05]"
vcompute_sockets: 1
vcompute_cores_per_socket: 9
Expand Down Expand Up @@ -54,16 +57,16 @@ local_regular_groups:
- 'users'
- 'depad'
local_regular_users:
- 'envsync'
- user: 'envsync'
groups: ['depad']
- 'gvdvries'
- user: 'gvdvries'
groups: ['users', 'depad']
- 'mbijlsma'
- user: 'mbijlsma'
groups: ['users', 'depad']
- 'mswerts'
- user: 'mswertz'
groups: ['users', 'depad']
- 'pneerincx'
- user: 'pneerincx'
groups: ['users', 'depad']
- 'rkanninga'
- user: 'rkanninga'
groups: ['users', 'depad']
...
3 changes: 3 additions & 0 deletions group_vars/talos-cluster/vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
slurm_cluster_name: 'talos'
slurm_cluster_domain: 'hpc.rug.nl'
stack_prefix: 'tl'
mailhub: '172.23.34.34'
rewrite_domain: "{{ stack_prefix }}-sai.{{ slurm_cluster_domain }}"
motd: "It's highly addictive"
vcompute_hostnames: "{{ stack_prefix }}-vcompute[01-03]"
vcompute_sockets: 2
vcompute_cores_per_socket: 2
Expand Down
44 changes: 27 additions & 17 deletions hc-cluster.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
---
- name: Sign host keys of all cluster hosts.
hosts: all
pre_tasks:
- name: Verify Ansible version meets requirements.
assert:
that: "ansible_version.full | version_compare('2.4', '>=')"
msg: 'You must update Ansible to at least 2.4.x to use this playbook.'
roles:
- ssh_host_signer
- ssh_known_hosts

- name: Install roles needed for all virtual cluster components except jumphosts.
hosts: cluster
become: true
Expand All @@ -8,15 +19,28 @@
# - ldap
- node_exporter
- cluster
- resolver
- shared_storage

- name: Install ansible on admin interfaces (DAI & SAI).
hosts:
- sys-admin-interface
- deploy-admin-interface
become: True
tasks:
- name: install Ansible
yum:
name: ansible-2.6.6-1.el7.umcg

- name: Install roles needed for jumphosts.
hosts: jumphost
become: true
roles:
- docker
- geerlingguy.repo-epel
# - ldap
- cluster
- node_exporter
# - geerlingguy.security
- geerlingguy.security
tasks:
- cron:
name: Reboot to load new kernel.
Expand All @@ -40,8 +64,6 @@
tasks:
roles:
- compute-vm
# - isilon
# - datahandling
- slurm-client

- name: Install User Interface (UI)
Expand All @@ -51,20 +73,8 @@
roles:
- slurm_exporter
- user-interface
# - datahandling
# - isilon
- slurm-client

#- name: Install ansible on admin interfaces (DAI & SAI).
# hosts:
# - imperator
# - sugarsnax
# become: True
# tasks:
# - name: install Ansible
# yum:
# name: ansible-2.6.6-1.el7.umcg

- import_playbook: local_admin_users.yml
- import_playbook: local_regular_users.yml
#- import_playbook: ssh-host-signer.yml
...
3 changes: 0 additions & 3 deletions host_vars/hc-sai

This file was deleted.

16 changes: 0 additions & 16 deletions host_vars/imperator

This file was deleted.

5 changes: 0 additions & 5 deletions host_vars/tl-sai

This file was deleted.

20 changes: 10 additions & 10 deletions local_regular_users.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,25 @@
with_items: "{{ local_regular_groups }}"
- name: "Check if required private groups for user's home dir are present."
group:
name: "{{ item }}" # Use same name as user's account name for user's private group.
gid: "{{ auth_users[item].uid }}" # Use same GID as user's UID for user's private group.
name: "{{ item.user }}" # Use same name as user's account name for user's private group.
gid: "{{ auth_users[item.user].uid }}" # Use same GID as user's UID for user's private group.
state: present
with_items: "{{ local_regular_users }}"
- name: 'Create local regular users and append them to relevant groups.'
user:
name: "{{ item }}"
uid: "{{ auth_users[item].uid }}"
comment: "{{ auth_users[item].comment }}"
group: "{{ item }}"
name: "{{ item.user }}"
uid: "{{ auth_users[item.user].uid }}"
comment: "{{ auth_users[item.user].comment }}"
group: "{{ item.user }}"
groups: "{{ item.groups }}"
home: "/home/{{ item }}"
home: "/home/{{ item.user }}"
append: no
with_items: "{{ local_regular_users }}"
- name: 'Deploy authorized keys for local regular users.'
authorized_key:
user: "{{ item }}"
key: "{{ auth_users[item].pub_keys }}"
user: "{{ item.user }}"
key: "{{ auth_users[item.user].pub_keys }}"
state: present
exclusive: yes
with_items: "{{ local_users }}"
with_items: "{{ local_regular_users }}"
...
10 changes: 9 additions & 1 deletion roles/ldap/handlers/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,26 @@
become: yes
listen: restart_nslcd

# OddJob has a dependency on DBus.
- name: Run authconfig update.
shell: "authconfig --enablemkhomedir --update"
become: yes
listen: restart_oddjobd

#
# Notes:
# * OddJob has a dependency on DBus.
# * Due to a bug systemd-logind may enter a broken state when DBus is restarted
# making logins via SSH and or sudo commands very slow.
# https://bugzilla.redhat.com/show_bug.cgi?id=1532105
# Workaround for now is to always restart systemd-logind after DBus is restarted.
#
- name: Restart dbusd and oddjobd services.
service:
name: "{{item}}"
state: restarted
with_items:
- dbus
- systemd-logind
- oddjobd
become: yes
listen: restart_oddjobd
Expand Down
5 changes: 3 additions & 2 deletions roles/nfs_home_server/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
---
- name: install nfs utils
- name: 'Install NFS utils.'
yum:
name: nfs-utils

- name: Add fstab entry
- name: 'Add share entry to NFS exports.'
lineinfile:
path: /etc/exports
line: /home {{network_range}}(rw,sync,no_root_squash,no_subtree_check)
...
11 changes: 10 additions & 1 deletion roles/user-interface/handlers/main.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,25 @@
---
# OddJob has a dependency on DBus.
- name: Run authconfig update.
shell: "authconfig --enablemkhomedir --update"
become: yes
listen: authconfig_update

#
# Notes:
# * OddJob has a dependency on DBus.
# * Due to a bug systemd-logind may enter a broken state when DBus is restarted
# making logins via SSH and or sudo commands very slow.
# https://bugzilla.redhat.com/show_bug.cgi?id=1532105
# Workaround for now is to always restart systemd-logind after DBus is restarted.
#
- name: Restart dbusd and oddjobd services.
service:
name: "{{item}}"
state: restarted
with_items:
- dbus
- systemd-logind
- oddjobd
become: yes
listen: restart_oddjobd
...
8 changes: 0 additions & 8 deletions single_role_playbooks/dai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,3 @@
command: rpmbuild --rebuild --without servers /tmp/{{ lustre_src_rpm_name }}
become: true
when: remote_file.stat.exists == false

- name: Mount isilon apps
mount:
path: /apps
src: gcc-storage001.stor.hpc.local:/ifs/rekencluster/umcgst10/.envsync/tmp01
fstype: nfs
opts: defaults,_netdev,nolock,vers=4.0,noatime,nodiratime
state: present
16 changes: 16 additions & 0 deletions single_role_playbooks/nfs_home_dirs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#
# Example to use NFS based home dirs exported from the UI and mounted on the compute nodes.
#
---
- name: Export /home on NFS server.
hosts: user-interface
become: true
roles:
- nfs_home_server

- name: Mount /home on NFS clients.
hosts: compute-vm
become: true
roles:
- nfs_home_client
...
1 change: 1 addition & 0 deletions single_role_playbooks/slurm-client.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@
tasks:
roles:
- slurm-client
...

0 comments on commit f82b10e

Please sign in to comment.