From 44cfaf95f081fccbf51e1d2590b082407460528c Mon Sep 17 00:00:00 2001 From: scimerman Date: Mon, 14 Feb 2022 18:12:29 +0100 Subject: [PATCH 1/9] Bug fix lua job path parsing, new cron for making missing slurm group accounts, HC fix sql passwords and sql root password change --- group_vars/hyperchicken_cluster/secrets.yml | 146 +++++++++--------- group_vars/hyperchicken_cluster/vars.yml | 2 +- roles/slurm_management/files/job_submit.lua | 2 +- .../cron_add_missing_slurm_accounts.sh | 18 +++ 4 files changed, 93 insertions(+), 75 deletions(-) create mode 100644 roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh diff --git a/group_vars/hyperchicken_cluster/secrets.yml b/group_vars/hyperchicken_cluster/secrets.yml index 573cd7662..331dfa6a4 100644 --- a/group_vars/hyperchicken_cluster/secrets.yml +++ b/group_vars/hyperchicken_cluster/secrets.yml @@ -1,74 +1,74 @@ $ANSIBLE_VAULT;1.2;AES256;hyperchicken_cluster -63396437613334626662366437633363356532356339633461383864666336346264353561633230 -6439373964323066373633663039373237313666323038370a353333646334393864626265616230 -37343064363630313832343762636438356163336431363932366165316561326436393665633065 -3762346265643365380a333130336231336630333032343930636661613638363861343039623032 -31356634666530383637326666623062326338313465383937346130376537616662353239336466 -34633433313839353231356234633636366536303565653061343836626331393536636265393431 -31373263366339313034356235616665306134616165653231656337333835376539343237666338 -65333631323339616563656162633063386565613734316264343032393231303864376638353164 -65653963376262653764313366643531636436663733623333613236623664313735306336363033 -38636531616266323162366435626636393337323461306638383465306236393239346539616365 -38613264363031343465626633396632333566306562383266633735643731643334313162323130 -66643966336133396665383765393638336230316333383263313632336363363136366635656434 -62663861396439393862623138366437376435646637333037653136633863633662396437303431 -36626134386662633932333630613163643131346135373130666131313264333131623135383362 -35356433633436613834663038393938393465653364393830353662623165356132386638393538 -31663636366138616566613435653636636364306332633735326533613263623132646235376137 -64316134633034616134343839646562643030393238316265303435316465313239373732643335 -35663838396462636665646362643436323231626236316134636535316237623433373735366163 -64356435336366626465333038336535626635623732366238323130333966363163336638373933 -35356363306161356631636533306431336532663032386531393937396331653535633435333335 -39373933613830303730656666326338393466313636363339356635666333323431346338366436 -30633934383836363631366331396662326338353139346139643366363737303165383031666431 -32636638393031323830363061343730363261633462366439336639626138366362373234323236 -61316463383130643661326364653638613265336436386630336162313062363663313333373730 -30653362376337333139633765353266663064633535363537656433333661336331386433653164 -31353937333036333736626436613733383361353634366664643037636239376136303765373735 -30613862343364333661313137323232396266353539306264366535303734363830643035616630 -39663033653565616535353465396330393337363963363962646662626138623534646331343034 -64656530373736303338613336323665396666326138343162303235653333313034656366373937 -66383762326634376362363762396334386337356430653336336438323437333534663832653935 -37366263663464393766393836373034336231653132663762396632323939393536633935636161 -64613433613965313563643336366165316465613037373962306532346562353931396634613665 -32376130343636356336323835646365663337343437383063383731396562653465616234303765 -61356134653163373931653665393732633131646634666433393830393064353937656235373833 -66643630663332303136383366396534663164346436313831396131343634613162316364643664 -30373262353637353938656531356439356539663036663361643134626532636430613938643762 -61643039613733356362623138373532376431346131643636313738356637386432373033643639 -30333634316239326137313936333130663365323139633862376166313961343331663436663266 -30366430313033626162326637663433363434666137343030633231343561623435656339643839 -37326162313636653231313435363631393538646136613636383339623033643261326433326233 -30626534623663326163303261663964383539326437626435636362333466666666393264323031 -31333733313965336535366139343366623232326461313665666531626166353965633134666631 -66316435663934393266326231343630343836343531383733336338626135333164643861643738 -63393636623635393332346466626333316133396432643031363733356538323562326530653231 -61633734303962383032306236613034386232326630643135333831646238303336633766363066 -34353138346636616430613139363935643735366161656235353735393433643139363233643237 -39353634643663386136653530613337643136613535666133303238393065326131383532363031 -32333961643936636637373630383066363331386437376239613535383738643735343161363665 -30316662336436303334363635343330636335643334613230313061373238366365303262373436 -33623336623765396639623163346363373465666366353434376163646264366339333338663030 -39326465626136313964626234353236393338356633356239633961333033306635636666306232 -66656562656666373431643637643535343030366636333863356463346239346539313264623330 -65333466643638623564326230366366393638666234653432373637306263333731383838643063 -35306133613633663362386331326162323566393831336532343131636537623938353535343063 -33613465333835666237653637396639636137336236393136376131343237616462653462653035 -39386466653036333634326431373639306663313938626639633938386236613434663631643434 -38386166366334373266303230613839313064306337376630386233306130333939316263616537 -64366565343165363062636439316666363765343133333963396565323631646338643465306132 -61653165343138643738363632366636303163643031663331633463353763623237376533396363 -36336231663630326139343339323664396337626666346531383161316264653566336638323134 -32333637613931616434386431626332353239373436353933376536363536653230313262613666 -65373630313034653830346133386435383661366365373465633766356664376639643331303831 -35656433346135313131346361653064323432346334353363323236613932666666353365306533 -35646432353136313633643232313830633237396263626562656561383436353862343737343934 -63653737323336373834343337373562333264666663646264353561356531656635633837383966 -32383066356333343562346537666337646330316166656532363562396636653230323462316335 -62663862363165633764353631333366363164666162346665393137376136656532653465343463 -63616536333163303036616532383266313936633766623065323030333033626364323461323239 -61623463356265626562396332616435313230623732366465653165316462313932363133343930 -62663565313263636664313535646232353762353366623134393333363434343533336132353466 -31383738313434663336646137623538313063383862323735396638356465313937376332646131 -32383665356431373464353465616630386639363039393463323631376662383632356266393730 -39663232363037306433623433396531353737383437656337346339646134346133 +66393063336431363636663761613733373530376637386564366237366366646434613538393032 +3237346439646338616561386532336664323937316133660a366335346135313834353038663861 +33666161376162316361653966343538646365616334393135636362333764353339313037383761 +6266646137343234370a636338323561353662623864373136663362616233386438613636373533 +30323364353031303332633037323634663032633561623736336262323237303266373665626237 +65613733343436386537626234613330333438616630666262373834616335346533663230623531 +36316161666230306235613431373565303665353164653535343435323335303433356430326330 +61623237336163623761303063363664346165303033373533656634653833623236383238323761 +33383732656633646639616462646362623034326462656433633263623739643363386235363937 +38323432323535623430333035363461616238313133373262333462393465653532626130363630 +66353464326632616363366431343761323936623736333334346130316263333730383432333830 +62343333643430613633393339386361373339383538613230333965373862313963323661373665 +35303865396238623131346131383430316532656133353831303762646161303263376332626564 +34303964343833393230626664333164313363333738633935363165616431623465663161666361 +34643933346638316138613433343966633561363337306633383261306234396537333064373663 +33303738616439326530306138623065303561613166363037653362373732396336386663336538 +33643566336365376436313231383564366164623735653934666662353830636362656230333434 +30663030316566616332376466363937613431643733336331663765343638636664346231663465 +39396162643761386463356638376530396466636435323734393432386333303534393965356663 +39363462313437373162373537363966313630383931626332643938303037316333613539306464 +31333038363864636163366631666633623461303338656466323163313832303337333833396539 +37383265313330656363383262396232613762613934306131373164343338316362643630336433 +63326236333865353333396332653531663665383566353432656464666439333839613364376563 +37326134646532653164363035616663366637653461346235376362383234373735633662616564 +33643537326164343661633638303035333431386566326364666361343435366562373666346339 +36643230613137353561363932666362326334303835666337623430346538366261303462653765 +36356130343131623835653337366236653864623730353765633239333639363966653565633338 +62633934653834343164373234303663396531383537646564306461353237343930363337643533 +30333535663837666661663265363763356232333665633865656138323061633437393336383064 +37346639626339643231326464333733386234393866626233383737323663633335366334626366 +35313236336239646436363033656330353964616530353530633231623836363763353035363336 +32653035373235313831633961333737636439336265336231346230363830303737343366333765 +35333837383365353733376566326538643732326338633165393365666639386335626363636664 +39616133616232636231623637663463643038663464646332343465376439353037336136313935 +65336137613766356231656461383061323662666666396361653031316664363031353864353565 +66643062646630373335636335303538633737393835353733353433646635303666663338643334 +31636362303637363334623038626333663838653765353564343639323637373831643037343231 +64313235663731343763363965356431313231626239663138666236393235663530303763323135 +36643733396136366235613361333732613936363531636262636331343463633938326532376163 +31356631616330666432393030383538663232623938663365393834613736353636333663333766 +32316361656637643539303635386636376132663532386339626533386137616130343466323433 +66653064343663636633303830366134333637663632663231306136656162386638643563343738 +38383234663962333335366131353436646636393037343861626336646463303163663362323135 +31633763303834373933353166303831396361326162323737353361313831643763376261646636 +35373261386632366562373466393566653835623338613235333436333962346364306136393765 +65616131353231336338633064623236373630633338633237356537333630653934376365636335 +39363964393234353130373433333735326165333331343237353136656261306534636439396465 +30636362353933323562323039626231396634336630386138353466323237623932363036636565 +65383361386539636261313861396161386337366663333838623634386334653266353765373635 +64346666626162663935346338333830366536363965643366323338626562323366346238646437 +61656535643734666637396362616633303966333862326466343530306230396637313338353534 +35333238643739643934646663376632353537306532336162656337636638363830353764346133 +32323064393966613033383932396131353964373666616630316636656366383664313838343162 +62323064313536653737373237616338306134323539336438643164393032383063363435653061 +62356563643565626233373831633661383363363236373936643633383835353134396336393833 +34623965383864343664663539626666333762363466376563313266383236353533363264626164 +62336264333236343233653766306539373333323030323138663634363362653965366332333561 +35343864633439343036356561663666313466396534643139383862333731376539623364363563 +32653266396233313766663037623235323132336630356238323338623064326330386537373731 +34343133323164643562616335346337343439613835656435356138353736323638313437643937 +63383632383134653064373661303536343564383035353964663235623936376432623332663032 +32626465623332386361383265303034363932363630383463316566333661303466303332653866 +39373931643238386332323862363532303635366533643662623632663530333831316234376239 +65373330636262666533373531366436343438383065376334343233343238393433363738656265 +33363436353535303630356230313036363139656131393036346538363861646331386433363834 +63633863343165666230383131316430633664343161613963333832323062383538316336303234 +34393437306438373931623730616638633962626630303230363265356264656365636264303365 +39653033666435633939616634663339653963653564343036323738303439643437363061326630 +34346631653232343135313037333461366338613561386331363435363532386534333238626136 +32343666306530646137363238383739643432393536636133653537303235363130346131666461 +63336332623361633232373437393735383230306262336235333662353632643664326434633331 +36313063646338326230363565383038356663346133306264623238313366636537646361303261 +30333632653966636134646336616364366661663738653839666364323633303334 diff --git a/group_vars/hyperchicken_cluster/vars.yml b/group_vars/hyperchicken_cluster/vars.yml index af06a50ac..1fa113c69 100644 --- a/group_vars/hyperchicken_cluster/vars.yml +++ b/group_vars/hyperchicken_cluster/vars.yml @@ -28,7 +28,7 @@ vcompute_ethernet_interfaces: ui_hostnames: "{{ slurm_cluster_name }}" ui_sockets: 4 ui_cores_per_socket: 1 -ui_real_memory: 7821 +ui_real_memory: 7820 ui_local_disk: 0 ui_features: 'prm09,tmp09' ui_ethernet_interfaces: diff --git a/roles/slurm_management/files/job_submit.lua b/roles/slurm_management/files/job_submit.lua index 7fefe0349..69a409a86 100644 --- a/roles/slurm_management/files/job_submit.lua +++ b/roles/slurm_management/files/job_submit.lua @@ -197,7 +197,7 @@ function slurm_job_submit(job_desc, part_list, submit_uid) --slurm.log_debug("Path to job *.out = %s.", tostring(job_desc.std_out)) --slurm.log_debug("Path to job *.err = %s.", tostring(job_desc.std_err)) --slurm.log_debug("Job's working dir = %s.", tostring(job_desc.work_dir)) - local job_metadata = {job_desc.std_out, job_desc.std_err, job_desc.work_dir} + local job_metadata = {tostring(job_desc.std_out), tostring(job_desc.std_err), tostring(job_desc.work_dir)} local group = nil local lfs = nil for inx,job_metadata_value in ipairs(job_metadata) do diff --git a/roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh b/roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh new file mode 100644 index 000000000..e3e3c0dfa --- /dev/null +++ b/roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Search for all existing groups inside the '/groups' folder +# that are inside /mnt/*/ folder on the SAI machines +for _each_pfs in /mnt/*; do + cd ${_each_pfs}/groups + # Find should be pretty robust way, but you need to manually + # remove the leading dot slash from from the group name: "./umcg-atd" + _accounts_list="$(find . -maxdepth 1 -mindepth 1 -type d | sed "s/^\.\///g")" + + IFS=$'\n' + for _each_group in "${_accounts_list}"; do + _output="$(sacctmgr -i create account ${_each_group} descr=scientists org=various parent=users fairshare=parent 2>&1)" + if [[ "${?}" -ne "1" ]]; then # suppress the normal output of " Nothing new added." with exit code 1 + echo "${_output}" + fi + done +done From cb29d8ff3ed55427323ab2589c4517ff0e39fcb0 Mon Sep 17 00:00:00 2001 From: scimerman Date: Mon, 14 Feb 2022 18:15:30 +0100 Subject: [PATCH 2/9] Implemented cron: Add missing slurm group accounts --- roles/slurm_management/tasks/main.yml | 64 +++++++++++++++++---------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/roles/slurm_management/tasks/main.yml b/roles/slurm_management/tasks/main.yml index e63c7250b..172595c97 100644 --- a/roles/slurm_management/tasks/main.yml +++ b/roles/slurm_management/tasks/main.yml @@ -1,7 +1,7 @@ # Deploy Slurm control host. --- - name: 'Add Slurm group.' - group: + ansible.builtin.group: name: 'slurm' gid: "{{ slurm_gid }}" notify: @@ -10,7 +10,7 @@ become: true - name: 'Add Munge group.' - group: + ansible.builtin.group: name: 'munge' gid: "{{ munge_gid }}" notify: @@ -20,7 +20,7 @@ become: true - name: 'Add Slurm user.' - user: + ansible.builtin.user: name: 'slurm' uid: "{{ slurm_uid }}" group: 'slurm' @@ -30,7 +30,7 @@ become: true - name: 'Add Munge user.' - user: + ansible.builtin.user: name: 'munge' uid: "{{ munge_uid }}" group: 'munge' @@ -41,7 +41,7 @@ become: true - name: 'Install munge.' - yum: + ansible.builtin.yum: state: 'latest' update_cache: true name: @@ -49,7 +49,7 @@ become: true - name: 'Install logrotate config file for slurmctld and slurmdbd.' - copy: + ansible.builtin.copy: src: "files/logrotate_slurm" owner: 'root' group: 'root' @@ -58,7 +58,7 @@ become: true - name: 'Install munge.key file.' - copy: + ansible.builtin.copy: src: "{{ stack_name }}/munge.key" owner: 'munge' group: 'munge' @@ -71,7 +71,7 @@ become: true - name: 'Install Slurm management dependencies with yum.' - yum: + ansible.builtin.yum: state: 'latest' update_cache: true name: @@ -84,7 +84,7 @@ become: true - name: 'Install Slurm management deamons with yum.' - yum: + ansible.builtin.yum: state: 'installed' update_cache: true allow_downgrade: true @@ -100,20 +100,20 @@ become: true - name: 'Create a database for Slurm accounting.' - mysql_db: + community.mysql.mysql_db: login_host: 'localhost' login_user: 'root' login_password: "{{ MYSQL_ROOT_PASSWORD }}" login_unix_socket: "/var/lib/mysql/mysql.sock" name: "{{ slurm_database_name }}" state: 'present' - no_log: true + #no_log: false notify: - 'restart_slurmdbd' - 'restart_slurmctld' - name: 'Make sure the slurm database user is present and grant privileges on the Slurm accounting DB.' - mysql_user: + community.mysql.mysql_user: login_host: 'localhost' login_user: 'root' login_password: "{{ MYSQL_ROOT_PASSWORD }}" @@ -129,7 +129,7 @@ - 'restart_slurmctld' - name: 'Create folders for Slurm.' - file: + ansible.builtin.file: name: "{{ item.name }}" owner: "{{ item.owner }}" group: "{{ item.group }}" @@ -155,7 +155,7 @@ become: true - name: 'Install Slurm config file.' - template: + ansible.builtin.template: src: 'templates/slurm.conf' dest: '/etc/slurm/slurm.conf' owner: 'root' @@ -166,7 +166,7 @@ become: true - name: 'Install Slurm DBD config file.' - template: + ansible.builtin.template: src: 'templates/slurmdbd.conf' dest: '/etc/slurm/slurmdbd.conf' owner: 'slurm' @@ -176,7 +176,7 @@ become: true - name: 'Install Slurm scripts.' - copy: + ansible.builtin.copy: src: "files/{{ item }}" dest: "/etc/slurm/{{ item }}" owner: 'root' @@ -187,7 +187,7 @@ become: true - name: 'Make sure slurmdbd and munge services are enabled and started.' - systemd: + ansible.builtin.systemd: name: "{{ item }}" state: 'started' enabled: 'yes' @@ -198,7 +198,7 @@ become: true - name: 'Copy Slurm DB initialization script to host running slurmdbd.' - template: + ansible.builtin.template: src: 'templates/configure_slurm_accounting_db.bash' dest: '/etc/slurm/configure_slurm_accounting_db.bash' owner: 'root' @@ -214,7 +214,7 @@ meta: flush_handlers - name: 'Execute Slurm DB initialization script on host running slurmdbd.' # noqa no-changed-when - command: + ansible.builtin.command: cmd: '/etc/slurm/configure_slurm_accounting_db.bash' register: command_result retries: 3 @@ -231,7 +231,7 @@ become: true - name: 'Make sure slurmctld service is enabled and started now that the cluster DB is present.' - systemd: + ansible.builtin.systemd: name: 'slurmctld.service' state: 'started' enabled: 'yes' @@ -239,7 +239,7 @@ become: true - name: 'Make backup dir.' - file: + ansible.builtin.file: path: '/root/slurm/backup' state: 'directory' owner: 'root' @@ -259,13 +259,13 @@ become: true - name: 'Deploy cron job to backup the Slurm accounting DB every night. Keep 7 backups.' - cron: + ansible.builtin.cron: name: 'Slurm database backup' minute: '11' hour: '3' job: > /bin/cp --backup=numbered /root/slurm/backup/slurm.sql /root/slurm/backup/slurm_bak.sql - && mysqldump --all-databases -uroot -p{{ MYSQL_ROOT_PASSWORD }} -h localhost --protocol=socket + && mysqldump --all-databases -uroot -p{{ MYSQL_ROOT_PASSWORD | quote }} -h localhost --protocol=socket > /root/slurm/backup/slurm.sql && /bin/find /root/slurm/backup/slurm_bak.sql.* -mtime 7 -delete tags: 'backup' @@ -273,7 +273,7 @@ become: true - name: 'Allow passwordless sudo to slurm user for users in the functional_admins_group.' - template: + ansible.builtin.template: src: 'templates/91-slurm' dest: "/etc/sudoers.d/91-slurm" owner: 'root' @@ -282,4 +282,20 @@ when: functional_admin_group is defined and functional_admin_group | length >= 1 become: true +- name: 'Deploy script to verify and if necessary fix prmissions of the environment managed by the envsync account.' + template: + src: 'templates/cron_add_missing_slurm_accounts.sh' + dest: '/root/cron_add_missing_slurm_accounts.sh' + owner: 'root' + group: 'root' + mode: '0750' + become: true + +- name: 'Deploy cron job to update/create account in the slurm database for each group, every half an hour.' + ansible.builtin.cron: + name: 'Add missing slurm group accounts' + minute: '24,54' + hour: '*' + job: '/root/cron_add_missing_slurm_accounts.sh' + become: true ... From e9e4f054d1067863ea40dbe624c178c50a86285a Mon Sep 17 00:00:00 2001 From: scimerman Date: Mon, 14 Feb 2022 18:16:29 +0100 Subject: [PATCH 3/9] Fix: typo --- roles/slurm_management/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/slurm_management/tasks/main.yml b/roles/slurm_management/tasks/main.yml index 172595c97..a0a4f2793 100644 --- a/roles/slurm_management/tasks/main.yml +++ b/roles/slurm_management/tasks/main.yml @@ -107,7 +107,7 @@ login_unix_socket: "/var/lib/mysql/mysql.sock" name: "{{ slurm_database_name }}" state: 'present' - #no_log: false + no_log: false notify: - 'restart_slurmdbd' - 'restart_slurmctld' From ed30102dd0d56d1a49c666ff5d119f4988d623c7 Mon Sep 17 00:00:00 2001 From: scimerman Date: Mon, 14 Feb 2022 18:31:24 +0100 Subject: [PATCH 4/9] Fix bugfix slurm backup --- roles/slurm_management/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/slurm_management/tasks/main.yml b/roles/slurm_management/tasks/main.yml index a0a4f2793..0f3385afb 100644 --- a/roles/slurm_management/tasks/main.yml +++ b/roles/slurm_management/tasks/main.yml @@ -267,7 +267,7 @@ /bin/cp --backup=numbered /root/slurm/backup/slurm.sql /root/slurm/backup/slurm_bak.sql && mysqldump --all-databases -uroot -p{{ MYSQL_ROOT_PASSWORD | quote }} -h localhost --protocol=socket > /root/slurm/backup/slurm.sql - && /bin/find /root/slurm/backup/slurm_bak.sql.* -mtime 7 -delete + && /bin/find /root/slurm/backup/slurm_bak.sql.* -mtime +7 -delete tags: 'backup' no_log: true become: true From 74cc78cf8f11e6db7e862337e2fd2713c04f79ea Mon Sep 17 00:00:00 2001 From: scimerman Date: Mon, 14 Feb 2022 19:02:58 +0100 Subject: [PATCH 5/9] fix ansible-lint --- .../subgroup_directories/tasks/create_subgroup_directories.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/roles/subgroup_directories/tasks/create_subgroup_directories.yml b/roles/subgroup_directories/tasks/create_subgroup_directories.yml index 60db7bde1..f93f3acda 100644 --- a/roles/subgroup_directories/tasks/create_subgroup_directories.yml +++ b/roles/subgroup_directories/tasks/create_subgroup_directories.yml @@ -5,6 +5,7 @@ getent group \ | grep -o "^{{ group }}-[^:]*-v[0-9][0-9]*" \ || true + changed_when: false register: versioned_subgroups - set_fact: # noqa unnamed-task versioned_subgroups_list: "{% if versioned_subgroups.stdout | length %}{{ versioned_subgroups.stdout.split('\n') | list }}{% endif %}" @@ -17,6 +18,7 @@ echo "${group}"; \ fi; \ done + changed_when: false register: unversioned_subgroups - set_fact: # noqa unnamed-task unversioned_subgroups_list: "{% if unversioned_subgroups.stdout | length %}{{ unversioned_subgroups.stdout.split('\n') | list }}{% endif %}" From 9f93b5771e202e87fd7628a029596ede43c8760f Mon Sep 17 00:00:00 2001 From: scimerman Date: Tue, 15 Feb 2022 10:13:58 +0100 Subject: [PATCH 6/9] Wingedhelix changed tmp02 > tmp07 --- group_vars/wingedhelix_cluster/vars.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/group_vars/wingedhelix_cluster/vars.yml b/group_vars/wingedhelix_cluster/vars.yml index 3595da953..15b3e94d6 100644 --- a/group_vars/wingedhelix_cluster/vars.yml +++ b/group_vars/wingedhelix_cluster/vars.yml @@ -24,7 +24,7 @@ vcompute_real_memory: 64264 vcompute_max_cpus_per_node: "{{ vcompute_sockets * vcompute_cores_per_socket - 2 }}" vcompute_max_mem_per_node: "{{ vcompute_real_memory - vcompute_sockets * vcompute_cores_per_socket * 512 }}" vcompute_local_disk: 0 # [MB] the slurm setting, how it sees the disk local_volume_size_vcompute -vcompute_features: 'tmp02' +vcompute_features: 'tmp07' vcompute_ethernet_interfaces: - 'eth0' - 'eth1' From 87d4209f484f194a79aee013f90c2525cd3b2eb4 Mon Sep 17 00:00:00 2001 From: scimerman <80223690+scimerman@users.noreply.github.com> Date: Tue, 15 Feb 2022 10:50:11 +0100 Subject: [PATCH 7/9] Update roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh Co-authored-by: Pieter Neerincx --- .../templates/cron_add_missing_slurm_accounts.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh b/roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh index e3e3c0dfa..66a9e2aad 100644 --- a/roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh +++ b/roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh @@ -3,7 +3,7 @@ # Search for all existing groups inside the '/groups' folder # that are inside /mnt/*/ folder on the SAI machines for _each_pfs in /mnt/*; do - cd ${_each_pfs}/groups + cd "${_each_pfs}/groups" # Find should be pretty robust way, but you need to manually # remove the leading dot slash from from the group name: "./umcg-atd" _accounts_list="$(find . -maxdepth 1 -mindepth 1 -type d | sed "s/^\.\///g")" From f5d56f5d911c3330879262488c62977f194a16d5 Mon Sep 17 00:00:00 2001 From: scimerman Date: Tue, 15 Feb 2022 11:44:33 +0100 Subject: [PATCH 8/9] Cron add missing accounts: improvment and bugfixes --- .../files/cron_add_missing_slurm_accounts.sh | 32 +++++++++++++++++++ roles/slurm_management/tasks/main.yml | 10 +++--- 2 files changed, 37 insertions(+), 5 deletions(-) create mode 100644 roles/slurm_management/files/cron_add_missing_slurm_accounts.sh diff --git a/roles/slurm_management/files/cron_add_missing_slurm_accounts.sh b/roles/slurm_management/files/cron_add_missing_slurm_accounts.sh new file mode 100644 index 000000000..809def8df --- /dev/null +++ b/roles/slurm_management/files/cron_add_missing_slurm_accounts.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# The script create the list of existing accounts in the sacctmgr, then it +# checkes for available groups in the /mnt/*/groups/ and it creates in the +# database all the missing ones. + +# sacctmgr: -n removes header, -P makes it parsable (works also for long group names) +readarray -t _existing_accounts < <(sacctmgr -n -P list account format=Account) + +# Search for all existing groups inside the '/groups' folder +# that are inside /mnt/*/ folder on the SAI machines +for _each_pfs in /mnt/*; do + if [[ -e "${_each_pfs}/groups" ]]; then + cd "${_each_pfs}/groups" + # Find should be pretty robust way, but you need to manually + # remove the leading dot slash from from the group name: "./umcg-atd" + readarray -t _groups < <(find . -maxdepth 1 -mindepth 1 -type d | sed "s/^\.\///g") + for _each_group in "${_groups[@]}"; do + # check if group already added + _group_missing=true + for _each_existing_account in "${_existing_accounts[@]}"; do + if [[ "${_each_existing_account}" == "${_each_group}" ]]; then + _group_missing=false + fi + done + if ${_group_missing}; then + printf "${0}: missing account for group '%s', adding it now\n" "${_each_group}" + sacctmgr -i create account "${_each_group}" descr=scientists org=various parent=users fairshare=parent + fi + done + fi +done diff --git a/roles/slurm_management/tasks/main.yml b/roles/slurm_management/tasks/main.yml index 0f3385afb..470a913f8 100644 --- a/roles/slurm_management/tasks/main.yml +++ b/roles/slurm_management/tasks/main.yml @@ -107,7 +107,7 @@ login_unix_socket: "/var/lib/mysql/mysql.sock" name: "{{ slurm_database_name }}" state: 'present' - no_log: false + no_log: true notify: - 'restart_slurmdbd' - 'restart_slurmctld' @@ -282,16 +282,16 @@ when: functional_admin_group is defined and functional_admin_group | length >= 1 become: true -- name: 'Deploy script to verify and if necessary fix prmissions of the environment managed by the envsync account.' - template: - src: 'templates/cron_add_missing_slurm_accounts.sh' +- name: 'Deploy cron script for creating missing slurm group accounts in the database.' + ansible.builtin.copy: + src: 'files/cron_add_missing_slurm_accounts.sh' dest: '/root/cron_add_missing_slurm_accounts.sh' owner: 'root' group: 'root' mode: '0750' become: true -- name: 'Deploy cron job to update/create account in the slurm database for each group, every half an hour.' +- name: 'Deploy cron job to create missing slurm group accounts in the slurm database, run every half an hour.' ansible.builtin.cron: name: 'Add missing slurm group accounts' minute: '24,54' From 859b94ec826633bdae12431c15aaf8d6c114025d Mon Sep 17 00:00:00 2001 From: scimerman Date: Tue, 15 Feb 2022 12:47:06 +0100 Subject: [PATCH 9/9] Removed cron from the templates, as the (static) file should be in files --- .../cron_add_missing_slurm_accounts.sh | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh diff --git a/roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh b/roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh deleted file mode 100644 index 66a9e2aad..000000000 --- a/roles/slurm_management/templates/cron_add_missing_slurm_accounts.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -# Search for all existing groups inside the '/groups' folder -# that are inside /mnt/*/ folder on the SAI machines -for _each_pfs in /mnt/*; do - cd "${_each_pfs}/groups" - # Find should be pretty robust way, but you need to manually - # remove the leading dot slash from from the group name: "./umcg-atd" - _accounts_list="$(find . -maxdepth 1 -mindepth 1 -type d | sed "s/^\.\///g")" - - IFS=$'\n' - for _each_group in "${_accounts_list}"; do - _output="$(sacctmgr -i create account ${_each_group} descr=scientists org=various parent=users fairshare=parent 2>&1)" - if [[ "${?}" -ne "1" ]]; then # suppress the normal output of " Nothing new added." with exit code 1 - echo "${_output}" - fi - done -done