-
Notifications
You must be signed in to change notification settings - Fork 102
/
site.yml
215 lines (198 loc) · 8.35 KB
/
site.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
---
## Preparations
## Making sure python exists on all nodes, so Ansible will be able to run; make sure min vars are defined
- hosts: all
gather_facts: no
become: yes
become_method: sudo
pre_tasks:
## It would be best to have ansible already installed on all machines.
## But if it is not, we'll try to do it:
- name: when no python2, install python2 for Ansible<2.8 (usually required on ubuntu, which defaults to python3) # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) || (yum install -y python2 python-simplejson)
register: output
changed_when: output.stdout != ""
tags: always
when:
- ansible_version.full is version_compare('2.8', '<')
- ( ansible_python_interpreter is not defined or ansible_python_interpreter == "/usr/bin/python" )
# ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
ignore_errors: true
## reason for ignore_errors: true
## "version_compare" was replaced with "version" starting ansible 2.5;
## CentOS/RHEL 7.x use ansible 2.4, so not able to grasp what version_compare is.
## Ansible 2.9 removes the version_compare and does not recognize it any longer.
## As our need is to add python2 only on versions before 2.8, if this fails
## (due to missing version_compare command), we are fine.
## We do not cover cases where it fails due to other reasons, but that is a reasonable risk,
## and that issue will be captured later in the flow.
- name: when no python(2/3), install python3(Debian) python2(RedHat) for Ansible>=2.8 # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
raw: test -e /usr/bin/python3 || (apt -y update && apt install -y python3-minimal) || (yum install -y python3 python-simplejson)
register: output
changed_when: output.stdout != ""
tags: always
when:
- ansible_version.full is version('2.8', '>=') or ( ansible_python_interpreter is defined and ansible_python_interpreter == "/usr/bin/python3" )
# ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
ignore_errors: true
## reason for ignore_errors: true
## is similar to the one explained above (complements it)
- name: Make sure proxy_env map is defined, even if not required; must be a map, e.g. empty map
set_fact:
proxy_env: '{{ proxy_env |default({}) }}'
cacheable: yes
tags: always
when: proxy_env is not defined
- setup: # aka gather_facts
tags: always # required for tags, see ansible issue: #14228
- name: test min. vars (group_vars/all) are set, like ClusterConfiguration (and k8s_network_addons_urls if netw is not via helm chart)
debug: msg='Make sure min. vars (group_vars/all) are set, like ClusterConfiguration (and k8s_network_addons_urls if netw is not via helm chart)'
when:
- ClusterConfiguration is not defined
- JoinConfiguration is not defined
failed_when:
- ClusterConfiguration is not defined
- JoinConfiguration is not defined
tags: always # always check if we have vars in place
## proper reset of any previous cluster (if any)
- hosts: primary-master
become: yes
become_method: sudo
tags:
- reset
- master
roles:
#- { role: helm, task: helm_reset, tags: [ 'reset', 'helm_reset' ] } # in helm3 is no longer required
- { role: storage, task: remove_pvs, tags: [ 'reset', 'storage_reset', 'pvs_reset' ] }
- { role: storage, task: nfs_reset, tags: [ 'reset', 'storage_reset', 'nfs_reset' ] }
- { role: storage, task: rook_reset, tags: [ 'reset', 'storage_reset', 'rook_reset' ] }
- { role: tools, task: reset_drain, tags: [ 'reset', 'node_reset', 'drain', 'node_drain' ] } #done on master, affecting nodes
## nodes -> reset and install common part (for all nodes)
- hosts: nodes
become: yes
become_method: sudo
tags:
- node
roles:
- { role: tools, task: reset, tags: [ 'reset', 'node_reset' ], when: "inventory_hostname not in groups['masters']" }
- { role: tools, task: weave_reset, tags: [ 'reset', 'node_reset', 'network_reset', 'weave_reset', 'weave' ], when: "inventory_hostname not in groups['masters']" }
- { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'node_install', 'node' ], when: "inventory_hostname not in groups['masters']" }
## master -> reset and install common part (for all masters - and sometimes etcd when colocated with masters)
- hosts: masters
become: yes
become_method: sudo
tags:
- master
roles:
- { role: tools, task: reset, tags: [ 'reset', 'master_reset' ] }
- { role: tools, task: weave_reset, tags: [ 'reset', 'master_reset', 'network_reset', 'weave', 'weave_reset' ] }
- { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'master_install'] }
## master -> install keepalived on masters (relevat if HA)
- hosts: masters
become: yes
become_method: sudo
any_errors_fatal: yes
tags:
- master
- install
- ha
- master_install
roles:
- role: keepalived
tags: [ 'master', 'install', 'master_install', 'ha', 'keepalived']
when:
- ( groups['masters'] | length ) > 1
- ( custom.networking.masterha_type | default('vip') ) == 'vip'
- hosts: primary-master
name: primary-master (or master in general) - it applies to both ha and non-ha
become: yes
become_method: sudo
any_errors_fatal: yes
tags:
- master
- install
- master_install
- ha
roles:
- { role: primary-master, task: primary, tags: [ 'primary-master', 'master', 'install', 'master_install'] }
- hosts: secondary-masters
become: yes
become_method: sudo
any_errors_fatal: yes
tags:
- master
- install
- ha
- master_install
roles:
- { role: non-primary-master, tags: [ 'secondary-masters', 'master', 'install', 'master_install', 'secondary_masters'] }
## node -> install nodes (kubeadm join, etc)
- hosts: nodes
become: yes
become_method: sudo
any_errors_fatal: yes
tags:
- node
- install
- node_install
roles:
- { role: non-primary-master, tags: [ 'node', 'install', 'node_install'], when: "inventory_hostname not in groups['masters']" }
## node -> label nodes (even when master is also a node)
- hosts: nodes
become: yes
become_method: sudo
any_errors_fatal: yes
tags:
- node
- install
- node_install
- label
roles:
- { role: tools, task: labels, tags: [ 'label'] }
## Post deploy (network, storage, taints, helm installation, helm charts deploy, any other addons)
- hosts: primary-master
become: yes
become_method: sudo
tags:
- post_deploy
roles:
- { role: tools, task: labels, tags: [ 'label'] }
- { role: post_deploy, task: all, tags: [ 'post_deploy_no_helm' ] }
- { role: storage, task: create_all, tags: [ 'storage', 'rook', 'nfs', 'vsphere' ] }
- { role: helm, task: helm, tags: [ 'helm' ] }
- { role: helm, task: charts_deploy, tags: [ 'helm', 'charts_deploy' ] }
### For fixes like vsphere's bug, we have to reboot after some more fixes...
#https://github.com/vmware/kubernetes/issues/495
- hosts: mustrebootlist
gather_facts: no
become: yes
become_method: sudo
tags:
- mustrebootlist
- vsphere_bug_fix
- vsphere
roles:
- { role: tools, task: reboot, tags: [ 'reboot_minimal' ], when: "ClusterConfiguration.cloudProvider is defined and ClusterConfiguration.cloudProvider == 'vsphere' and allow_restart | default(False) and vsphere_bug_fix is defined and vsphere_bug_fix" }
## Generic Sanity
- hosts: masters
become: yes
become_method: sudo
tags:
- master
pre_tasks:
- name: remove temporary mustreboot temporary group
group:
name: mustrebootlist
state: absent
roles:
- { role: tools, task: cluster_sanity, tags: [ 'cluster_sanity', 'sanity' ] }
- { role: tools, task: postinstall_messages, tags: [ 'cluster_sanity', 'sanity' ] }
## to reset/add only some (more) nodes:
## 1. keep in hosts only:
## - the master
## - the affected node (all other nodes should not be there)
## 2. Have the token defined in the group_vars/all
## 3. Run using only this/these tag(s):
## ansible-playbook -i hosts -v site.yml --tags "node" # same with: ansible-playbook -i hosts -v site.yml --tags "node_reset,node_install,cluster_sanity,cluster_info"
## To get cluster info/sanity:
## ansible-playbook -i hosts -v site.yml --tags "cluster_sanity,cluster_info"