diff --git a/experiments/kubernetes/bin/create-all.sh b/experiments/kubernetes/bin/create-all.sh
index 1ceaac73..e8e8274d 100755
--- a/experiments/kubernetes/bin/create-all.sh
+++ b/experiments/kubernetes/bin/create-all.sh
@@ -67,7 +67,7 @@
yq write \
--inplace \
"${statusyml:?}" \
- 'aglais.spec.openstack.cloudname' \
+ 'aglais.status.openstack.cloudname' \
"${cloudname}"
echo "---- ---- ----"
@@ -99,7 +99,7 @@
yq write \
--inplace \
"${statusyml:?}" \
- 'aglais.status.openstack.cluster.id' \
+ 'aglais.status.openstack.magnum.uuid' \
"${clusterid}"
# -----------------------------------------------------
@@ -192,13 +192,22 @@ EOF
#TODO Patch the k8s metrics
+ # We can't capture the external IP address here because it won't be ready yet.
+
+ yq write \
+ --inplace \
+ "${statusyml:?}" \
+ 'aglais.status.kubernetes.ingress.dashboard.hostname' \
+ "${dashhost}"
+
# -----------------------------------------------------
# Mount the data shares.
# Using a hard coded cloud name to make it portable.
+# Hard coded mode to 'rw' due to problems with ReadOnlyMany
sharelist='/common/manila/datashares.yaml'
- sharemode='ro'
+ sharemode='rw'
for shareid in $(
yq read \
@@ -283,6 +292,14 @@ EOF
"/kubernetes/helm/tools/zeppelin" \
--values "/tmp/zeppelin-values.yaml"
+ # We can't capture the IP address here because it won't be ready yet.
+
+ yq write \
+ --inplace \
+ "${statusyml:?}" \
+ 'aglais.status.kubernetes.ingress.zeppelin.hostname' \
+ "${zepphost}"
+
# -----------------------------------------------------
# Install our Drupal chart.
@@ -307,3 +324,57 @@ EOF
cat > "/tmp/drupal-values.yaml" << EOF
drupal_server_hostname: "${drupalhost:?}"
EOF
+
+
+
+
+# -----------------------------------------------------
+# Capture our Dashboard ingress IP address.
+# ** This has to be done after a delay to allow Kubernetes time to allocate the IP address.
+
+# sleep 30
+
+ daship=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get Ingress \
+ --output json \
+ | jq -r '
+ .items[]
+ | select(.metadata.name == "aglais-dashboard-kubernetes-dashboard")
+ | .status.loadBalancer.ingress[0].ip
+ '
+ )
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ 'aglais.status.kubernetes.ingress.dashboard.ipv4' \
+ "${daship}"
+
+
+# -----------------------------------------------------
+# Capture our Zeppelin ingress IP address.
+# ** This has to be done after a delay to allow Kubernetes time to allocate the IP address.
+
+# sleep 30
+
+ zeppip=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get Ingress \
+ --output json \
+ | jq -r '
+ .items[]
+ | select(.metadata.name == "zeppelin-server-ingress")
+ | .status.loadBalancer.ingress[0].ip
+ '
+ )
+
+ yq write \
+ --inplace \
+ "${statusyml:?}" \
+ 'aglais.status.kubernetes.ingress.zeppelin.ipv4' \
+ "${zeppip}"
+
+
diff --git a/notes/zrq/20210125-01-kubernetes-deploy.txt b/notes/zrq/20210125-01-kubernetes-deploy.txt
new file mode 100644
index 00000000..fecbaaea
--- /dev/null
+++ b/notes/zrq/20210125-01-kubernetes-deploy.txt
@@ -0,0 +1,992 @@
+#
+#
+#
+# Copyright (c) 2021, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+
+ Target:
+
+ Try to get the Kubernetes deployment to work.
+
+ Results:
+
+ Failed.
+ Failing to mount the PV claims, intermittent .. different results, different reasons.
+
+
+# -----------------------------------------------------
+# Update the Openstack cloud name.
+#[user@desktop]
+
+ cloudname=gaia-dev
+
+ sed -i '
+ s/^\(AGLAIS_CLOUD\)=.*$/\1='${cloudname:?}'/
+ ' "${HOME}/aglais.env"
+
+# -----------------------------------------------------
+# Create a container to work with.
+# (*) extra volume mount for /common
+# (*) mount kubernetes directory as read/write
+#[user@desktop]
+
+ source "${HOME:?}/aglais.env"
+
+ podman run \
+ --rm \
+ --tty \
+ --interactive \
+ --name kubernator \
+ --hostname kubernator \
+ --env "SSH_AUTH_SOCK=/mnt/ssh_auth_sock" \
+ --volume "${SSH_AUTH_SOCK}:/mnt/ssh_auth_sock:rw,z" \
+ --env "cloudname=${AGLAIS_CLOUD:?}" \
+ --volume "${HOME:?}/clouds.yaml:/etc/openstack/clouds.yaml:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/common:/common:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/openstack:/openstack:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/kubernetes:/kubernetes:rw,z" \
+ atolmis/ansible-client:latest \
+ bash
+
+
+# -----------------------------------------------------
+# Delete everything.
+#[root@kubernator]
+
+ /openstack/bin/delete-all.sh \
+ "${cloudname:?}"
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Create our Aglais configuration.
+#[root@kubernator]
+
+cat > '/tmp/aglais-config.yml' << EOF
+aglais:
+ version: 1.0
+ spec:
+ openstack:
+ cloudname: ${cloudname:?}
+ dashboard:
+ hostname: dashboard.metagrid.xyz
+ zeppelin:
+ hostname: zeppelin.metagrid.xyz
+ drupal:
+ hostname: drupal.metagrid.xyz
+EOF
+
+
+# -----------------------------------------------------
+# Create everything.
+#[root@kubernator]
+
+ /kubernetes/bin/create-all.sh
+
+
+ > ....
+ > ....
+ > Installing dashboard Helm chart
+ > Namespace [aglais-20210125]
+ > Dash host [dashboard.metagrid.xyz]
+ > Getting updates for unmanaged Helm repositories...
+ > ...Successfully got an update from the "https://kubernetes.github.io/dashboard" chart repository
+ > Saving 1 charts
+ > Downloading kubernetes-dashboard from repo https://kubernetes.github.io/dashboard
+ > Deleting outdated charts
+ > Release "aglais-dashboard" does not exist. Installing it now.
+ > Error: Internal error occurred: failed calling webhook "validate.nginx.ingress.kubernetes.io": Post https://aglais-ingress-nginx-controller-admission.aglais-20210125.svc:443/networking/v1beta1/ingresses?timeout=10s: dial tcp 10.254.121.232:443: connect: connection refused
+ > ....
+ > ....
+
+ #
+ # Dashboard error is back ...
+ #
+
+# -----------------------------------------------------
+# Check the results.
+#[root@kubernator]
+
+ cat '/tmp/aglais-status.yml'
+
+ > aglais:
+ > status:
+ > deployment:
+ > type: kubernetes
+ > name: aglais-20210125
+ > date: 20210125:024859
+ > openstack:
+ > cluster:
+ > id: 789e1e50-735a-4705-ade3-b409e6f62fc5
+ > kubernetes:
+ > namespace: aglais-20210125
+ > spec:
+ > openstack:
+ > cloudname: gaia-dev
+
+
+# -----------------------------------------------------
+# Get the cluster ID and K8s namespace.
+#[root@kubernator]
+
+ clusterid=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.openstack.cluster.id'
+ )
+
+ namespace=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.kubernetes.namespace'
+ )
+
+cat << EOF
+Cluster ID [${clusterid}]
+Name space [${namespace}]
+EOF
+
+
+ > Cluster ID [789e1e50-735a-4705-ade3-b409e6f62fc5]
+ > Name space [aglais-20210125]
+
+
+# -----------------------------------------------------
+# Get the Dashboard ServiceAccount token.
+#[root@kubernator]
+
+ secretname=$(
+ kubectl \
+ --output json \
+ --namespace "${namespace:?}" \
+ get ServiceAccount \
+ "aglais-dashboard-kubernetes-dashboard" \
+ | jq -r '.secrets[0].name'
+ )
+
+ kubectl \
+ --output json \
+ --namespace "${namespace:?}" \
+ get Secret \
+ "${secretname:?}" \
+ | jq -r '.data.token | @base64d'
+
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Get the Ingress address.
+#[root@kubernator]
+
+ kubectl \
+ --namespace "${namespace:?}" \
+ get Ingress
+
+ > NAME HOSTS ADDRESS PORTS AGE
+ > zeppelin-server-ingress zeppelin.metagrid.xyz 128.232.227.215 80, 443 5m28s
+
+
+ zeppelinip=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get Ingress \
+ --output json \
+ | jq -r '
+ .items[]
+ | select(.metadata.name == "zeppelin-server-ingress")
+ | .status.loadBalancer.ingress[0].ip
+ '
+ )
+
+ echo "Zeppelin IP [${zeppelinip:?}]"
+
+ > Zeppelin IP [128.232.227.215]
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ 'aglais.status.kubernetes.ingress.zeppelin.ipv4' \
+ "${zeppelinip:?}"
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+
+ #
+ # Update our DNS ..
+ #
+
+
+# -----------------------------------------------------
+# Check the Dashboard page.
+#[root@kubernator]
+
+ curl --head --insecure "https://dashboard.metagrid.xyz/"
+
+ > HTTP/2 404
+ > date: Mon, 25 Jan 2021 03:30:30 GMT
+ > content-type: text/html
+ > content-length: 146
+ > strict-transport-security: max-age=15724800; includeSubDomains
+
+ #
+ # As expected ..
+ #
+
+
+# -----------------------------------------------------
+# Check the Zeppelin page.
+#[root@kubernator]
+
+ curl --head --insecure "https://zeppelin.metagrid.xyz/"
+
+ > HTTP/2 200
+ > date: Mon, 25 Jan 2021 03:31:08 GMT
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Login to Zeppelin and test ...
+#[user@desktop]
+
+ firefox --new-window "https://zeppelin.metagrid.xyz/" &
+
+ > Looks good.
+ > Login works :-)
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Mount each of the external catalogs in Spark.
+#[user@zeppelin]
+
+# --------------------------------
+%spark.conf
+
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-gaia-dr2.mount.path /data/gaia/dr2
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-gaia-dr2.mount.readOnly true
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-gaia-dr2.options.claimName aglais-gaia-dr2-claim
+
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-gaia-edr3.mount.path /data/gaia/edr3
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-gaia-edr3.mount.readOnly true
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-gaia-edr3.options.claimName aglais-gaia-edr3-claim
+
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-wise-allwise.mount.path /data/wise/allwise
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-wise-allwise.mount.readOnly true
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-wise-allwise.options.claimName aglais-wise-allwise-claim
+
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-panstarrs-dr1.mount.path /data/panstarrs/dr1
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-panstarrs-dr1.mount.readOnly true
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-panstarrs-dr1.options.claimName aglais-panstarrs-dr1-claim
+
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-twomass-allsky.mount.path /data/twomass/allsky
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-twomass-allsky.mount.readOnly true
+spark.kubernetes.executor.volumes.persistentVolumeClaim.aglais-twomass-allsky.options.claimName aglais-twomass-allsky-claim
+
+
+# --------------------------------
+%spark.pyspark
+
+gaia_dr2 = sqlContext.read.parquet(
+ "/data/gaia/dr2"
+ )
+
+print("gaia-dr2 count: ", gaia_dr2.count())
+print("gaia-dr2 partitions: ", gaia_dr2.rdd.getNumPartitions())
+
+ > org.apache.zeppelin.interpreter.InterpreterException: java.io.IOException: Launching zeppelin interpreter on kubernetes is time out, kill it now
+ > at org.apache.zeppelin.interpreter.remote.RemoteInterpreter.open(RemoteInterpreter.java:132)
+ > at org.apache.zeppelin.interpreter.remote.RemoteInterpreter.getFormType(RemoteInterpreter.java:279)
+ > at org.apache.zeppelin.notebook.Paragraph.jobRun(Paragraph.java:465)
+ > at org.apache.zeppelin.notebook.Paragraph.jobRun(Paragraph.java:73)
+ > at org.apache.zeppelin.scheduler.Job.run(Job.java:172)
+ > at org.apache.zeppelin.scheduler.AbstractScheduler.runJob(AbstractScheduler.java:130)
+ > at org.apache.zeppelin.scheduler.RemoteScheduler$JobRunner.run(RemoteScheduler.java:180)....
+ > ....
+
+ #
+ # Failed to load the interpreter.
+ #
+
+
+# -----------------------------------------------------
+# List the active Pods.
+#[root@kubernator]
+
+ kubectl --namespace ${namespace} get pod
+
+ > NAME READY STATUS RESTARTS AGE
+ > aglais-ceph-csi-cephfs-nodeplugin-j672m 3/3 Running 0 60m
+ > aglais-ceph-csi-cephfs-nodeplugin-vdswl 3/3 Running 0 60m
+ > aglais-ceph-csi-cephfs-nodeplugin-wdf9h 3/3 Running 0 60m
+ > aglais-ceph-csi-cephfs-nodeplugin-xplzq 3/3 Running 0 60m
+ > aglais-ceph-csi-cephfs-provisioner-f9ff8cd4c-2hzwq 6/6 Running 0 60m
+ > aglais-ceph-csi-cephfs-provisioner-f9ff8cd4c-fltvf 6/6 Running 0 60m
+ > aglais-ceph-csi-cephfs-provisioner-f9ff8cd4c-hn6xn 6/6 Running 0 60m
+ > aglais-dashboard-kubernetes-dashboard-b5f955c8f-7sjvf 2/2 Running 0 60m
+ > aglais-gaia-dr2-testpod 0/1 ContainerCreating 0 60m
+ > aglais-gaia-edr3-testpod 0/1 ContainerCreating 0 60m
+ > aglais-ingress-nginx-controller-54f444477b-65hj2 1/1 Running 0 60m
+ > aglais-openstack-manila-csi-controllerplugin-0 3/3 Running 0 60m
+ > aglais-openstack-manila-csi-nodeplugin-jbnqb 2/2 Running 0 60m
+ > aglais-openstack-manila-csi-nodeplugin-jwwgj 2/2 Running 0 60m
+ > aglais-openstack-manila-csi-nodeplugin-ssd5j 2/2 Running 0 60m
+ > aglais-openstack-manila-csi-nodeplugin-vprhp 2/2 Running 0 60m
+ > aglais-panstarrs-dr1-testpod 0/1 ContainerCreating 0 59m
+ > aglais-twomass-allsky-testpod 0/1 ContainerCreating 0 59m
+ > aglais-user-nch-testpod 1/1 Running 0 59m
+ > aglais-user-stv-testpod 0/1 ContainerCreating 0 58m
+ > aglais-user-zrq-testpod 0/1 ContainerCreating 0 59m
+ > aglais-wise-allwise-testpod 0/1 ContainerCreating 0 59m
+ > spark-sukuqz 0/1 Init:0/1 0 2s
+ > zeppelin-server-deploy-7cb7f54d5c-srs95 3/3 Running 0 58m
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the Spark interpreter Pod.
+#[root@kubernator]
+
+ kubectl --namespace ${namespace} get pod spark-sukuqz
+
+ > NAME READY STATUS RESTARTS AGE
+ > spark-sukuqz 0/1 Init:0/1 0 8s
+
+
+ kubectl --namespace ${namespace} describe pod spark-sukuqz
+
+ > Name: spark-sukuqz
+ > Namespace: aglais-20210125
+ > Node: aglais-20210125-cluster-rqiklyztkmq6-node-0/10.0.0.74
+ > Start Time: Mon, 25 Jan 2021 03:57:36 +0000
+ > Labels: app=spark-sukuqz
+ > interpreterGroupId=spark-shared_process
+ > interpreterSettingName=spark
+ > Annotations:
+ > Status: Pending
+ > ....
+ > ....
+ > Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Normal Scheduled default-scheduler Successfully assigned aglais-20210125/spark-sukuqz to aglais-20210125-cluster-rqiklyztkmq6-node-0
+ > Warning FailedMount 18s (x4 over 23s) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-0 MountVolume.MountDevice failed for volume "aglais-gaia-edr3-volume" : rpc error: code = InvalidArgument desc = stage secrets cannot be nil or empty
+ > Warning FailedMount 18s (x4 over 23s) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-0 MountVolume.MountDevice failed for volume "aglais-wise-allwise-volume" : rpc error: code = InvalidArgument desc = stage secrets cannot be nil or empty
+ > Warning FailedMount 17s (x4 over 23s) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-0 MountVolume.MountDevice failed for volume "aglais-gaia-dr2-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-gaia-dr2-volume/globalmount: permission denied
+ > Warning FailedMount 15s (x5 over 24s) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-0 MountVolume.MountDevice failed for volume "aglais-panstarrs-dr1-volume" : rpc error: code = InvalidArgument desc = stage secrets cannot be nil or empty
+ > Warning FailedMount 15s (x4 over 20s) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-0 MountVolume.MountDevice failed for volume "aglais-user-stv-volume" : rpc error: code = InvalidArgument desc = stage secrets cannot be nil or empty
+ > Warning FailedMount 15s (x4 over 19s) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-0 MountVolume.MountDevice failed for volume "aglais-user-zrq-volume" : rpc error: code = InvalidArgument desc = stage secrets cannot be nil or empty
+
+
+# -----------------------------------------------------
+# Check the DR2 test Pod.
+#[root@kubernator]
+
+ kubectl --namespace ${namespace} describe pod aglais-gaia-dr2-testpod
+
+ > Name: aglais-gaia-dr2-testpod
+ > Namespace: aglais-20210125
+ > Node: aglais-20210125-cluster-rqiklyztkmq6-node-2/10.0.0.81
+ > Start Time: Mon, 25 Jan 2021 02:57:14 +0000
+ > Labels: aglais.dataset=aglais-gaia-dr2
+ > aglais.name=aglais-gaia-dr2-testpod
+ > app.kubernetes.io/instance=aglais-gaia-dr2
+ > app.kubernetes.io/managed-by=Helm
+ > app.kubernetes.io/name=manila-share
+ > app.kubernetes.io/version=0.0.1
+ > helm.sh/chart=manila-share-0.0.1
+ > Annotations: meta.helm.sh/release-name: aglais-gaia-dr2
+ > meta.helm.sh/release-namespace: aglais-20210125
+ > Status: Pending
+ > ....
+ > ....
+ > Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Warning FailedMount 45m (x4 over 65m) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-2 Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[local-data default-token-swjxp test-data]: timed out waiting for the condition
+ > Warning FailedMount 22m (x3 over 54m) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-2 Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[default-token-swjxp test-data local-data]: timed out waiting for the condition
+ > Warning FailedMount 6m26s (x17 over 63m) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-2 Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[test-data local-data default-token-swjxp]: timed out waiting for the condition
+ > Warning FailedMount 2m7s (x34 over 66m) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-2 MountVolume.MountDevice failed for volume "aglais-gaia-dr2-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-gaia-dr2-volume/globalmount: permission denied
+
+# -----------------------------------------------------
+# Check the eDR3 test Pod.
+#[root@kubernator]
+
+ kubectl --namespace ${namespace} describe pod aglais-gaia-edr3-testpod
+
+ > Name: aglais-gaia-edr3-testpod
+ > Namespace: aglais-20210125
+ > Node: aglais-20210125-cluster-rqiklyztkmq6-node-1/10.0.0.172
+ > Start Time: Mon, 25 Jan 2021 02:57:27 +0000
+ > Labels: aglais.dataset=aglais-gaia-edr3
+ > aglais.name=aglais-gaia-edr3-testpod
+ > app.kubernetes.io/instance=aglais-gaia-edr3
+ > app.kubernetes.io/managed-by=Helm
+ > app.kubernetes.io/name=manila-share
+ > app.kubernetes.io/version=0.0.1
+ > helm.sh/chart=manila-share-0.0.1
+ > Annotations: meta.helm.sh/release-name: aglais-gaia-edr3
+ > meta.helm.sh/release-namespace: aglais-20210125
+ > Status: Pending
+ > ....
+ > ....
+ > Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Warning FailedMount 22m (x16 over 63m) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-1 Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[test-data local-data default-token-swjxp]: timed out waiting for the condition
+ > Warning FailedMount 7m5s (x5 over 65m) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-1 Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[default-token-swjxp test-data local-data]: timed out waiting for the condition
+ > Warning FailedMount 2m47s (x39 over 68m) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-1 MountVolume.MountDevice failed for volume "aglais-gaia-edr3-volume" : rpc error: code = InvalidArgument desc = stage secrets cannot be nil or empty
+
+
+# -----------------------------------------------------
+# Uninstall the DR2 Helm chart.
+#[root@kubernator]
+
+ helm --namespace ${namespace} list
+
+ > NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION
+ > aglais aglais-20210125 1 2021-01-25 02:56:33.71216801 +0000 UTC deployed aglais-0.0.1 0.0.1
+ > aglais-dashboard aglais-20210125 1 2021-01-25 02:56:57.237206389 +0000 UTC failed aglais-dashboard-0.0.1 0.0.1
+ > aglais-gaia-dr2 aglais-20210125 1 2021-01-25 02:57:13.386058418 +0000 UTC deployed manila-share-0.0.1 0.0.1
+ > aglais-gaia-edr3 aglais-20210125 1 2021-01-25 02:57:26.35641445 +0000 UTC deployed manila-share-0.0.1 0.0.1
+ > aglais-panstarrs-dr1 aglais-20210125 1 2021-01-25 02:57:50.90011394 +0000 UTC deployed manila-share-0.0.1 0.0.1
+ > aglais-twomass-allsky aglais-20210125 1 2021-01-25 02:58:02.568336858 +0000 UTC deployed manila-share-0.0.1 0.0.1
+ > aglais-user-nch aglais-20210125 1 2021-01-25 02:58:15.395168148 +0000 UTC deployed manila-share-0.0.1 0.0.1
+ > aglais-user-stv aglais-20210125 1 2021-01-25 02:58:40.132723076 +0000 UTC deployed manila-share-0.0.1 0.0.1
+ > aglais-user-zrq aglais-20210125 1 2021-01-25 02:58:27.136220405 +0000 UTC deployed manila-share-0.0.1 0.0.1
+ > aglais-wise-allwise aglais-20210125 1 2021-01-25 02:57:38.726474125 +0000 UTC deployed manila-share-0.0.1 0.0.1
+ > aglais-zeppelin aglais-20210125 1 2021-01-25 02:58:42.919251268 +0000 UTC deployed aglais-zeppelin-0.0.1 0.0.1
+
+
+ helm --namespace ${namespace} uninstall aglais-gaia-dr2
+
+ > release "aglais-gaia-dr2" uninstalled
+
+
+# -----------------------------------------------------
+# Install the DR2 Helm chart.
+#[root@kubernator]
+
+ sharename=aglais-gaia-dr2
+ mountpath=/data/gaia/dr2
+ sharemode='rw'
+
+ '/kubernetes/bin/cephfs-mount.sh' \
+ 'gaia-prod' \
+ "${namespace:?}" \
+ "${sharename:?}" \
+ "${mountpath:?}" \
+ "${sharemode:?}"
+
+ > ---- ---- ----
+ > File [cephfs-mount.sh]
+ > Path [/kubernetes/bin]
+ > ---- ---- ----
+ > Cloud name [gaia-prod]
+ > Namespace [aglais-20210125]
+ > Share name [aglais-gaia-dr2]
+ > Mount path [/data/gaia/dr2]
+ > Share mode [rw]
+ > ---- ---- ----
+ >
+ > ----
+ > Share uuid [2e46b5a5-c5d9-44c0-b11c-310c222f4818]
+ > ----
+ > Share size [512]
+ > ----
+ > Access rule [50ad6086-491d-4056-9092-c57ac49d4d3d]
+ > Release "aglais-gaia-dr2" does not exist. Installing it now.
+ > NAME: aglais-gaia-dr2
+ > LAST DEPLOYED: Mon Jan 25 04:18:06 2021
+ > NAMESPACE: aglais-20210125
+ > STATUS: deployed
+ > REVISION: 1
+ > TEST SUITE: None
+ > NOTES:
+ > Use the testpod to check access to the mounted volume.
+
+
+ kubectl --namespace ${namespace} describe pod aglais-gaia-dr2-testpod
+
+ > Name: aglais-gaia-dr2-testpod
+ > Namespace: aglais-20210125
+ > Node: aglais-20210125-cluster-rqiklyztkmq6-node-3/10.0.0.107
+ > Start Time: Mon, 25 Jan 2021 04:18:06 +0000
+ > Labels: aglais.dataset=aglais-gaia-dr2
+ > aglais.name=aglais-gaia-dr2-testpod
+ > app.kubernetes.io/instance=aglais-gaia-dr2
+ > app.kubernetes.io/managed-by=Helm
+ > app.kubernetes.io/name=manila-share
+ > app.kubernetes.io/version=0.0.1
+ > helm.sh/chart=manila-share-0.0.1
+ > Annotations: meta.helm.sh/release-name: aglais-gaia-dr2
+ > meta.helm.sh/release-namespace: aglais-20210125
+ > Status: Running
+ > ....
+ > ....
+ > Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Normal Scheduled default-scheduler Successfully assigned aglais-20210125/aglais-gaia-dr2-testpod to aglais-20210125-cluster-rqiklyztkmq6-node-3
+ > Normal Pulling 14s kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-3 Pulling image "fedora:latest"
+ > Normal Pulled 7s kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-3 Successfully pulled image "fedora:latest"
+ > Normal Created 6s kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-3 Created container aglais-gaia-dr2-container
+ > Normal Started 6s kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-3 Started container aglais-gaia-dr2-container
+
+
+# -----------------------------------------------------
+# Uninstall the eDR3 Helm chart.
+#[root@kubernator]
+
+ helm --namespace ${namespace} uninstall aglais-gaia-edr3
+
+ > release "aglais-gaia-edr3" uninstalled
+
+
+# -----------------------------------------------------
+# Install the eDR3 Helm chart.
+#[root@kubernator]
+
+ sharename=aglais-gaia-edr3
+ mountpath=/data/gaia/edr3
+ sharemode='rw'
+
+ '/kubernetes/bin/cephfs-mount.sh' \
+ 'gaia-prod' \
+ "${namespace:?}" \
+ "${sharename:?}" \
+ "${mountpath:?}" \
+ "${sharemode:?}"
+
+ >
+ > ---- ---- ----
+ > File [cephfs-mount.sh]
+ > Path [/kubernetes/bin]
+ > ---- ---- ----
+ > Cloud name [gaia-prod]
+ > Namespace [aglais-20210125]
+ > Share name [aglais-gaia-edr3]
+ > Mount path [/data/gaia/edr3]
+ > Share mode [rw]
+ > ---- ---- ----
+ >
+ > ----
+ > Share uuid [ca8231c3-1f5c-4ebf-8ec0-d3cfe2629976]
+ > ----
+ > Share size [540]
+ > ----
+ > Access rule [0a4b37bc-e07e-4763-a8af-4d9cf3ae9620]
+ > Release "aglais-gaia-edr3" does not exist. Installing it now.
+ > NAME: aglais-gaia-edr3
+ > LAST DEPLOYED: Mon Jan 25 04:39:15 2021
+ > NAMESPACE: aglais-20210125
+ > STATUS: deployed
+ > REVISION: 1
+ > TEST SUITE: None
+ > NOTES:
+ > Use the testpod to check access to the mounted volume.
+
+
+ kubectl --namespace ${namespace} describe pod aglais-gaia-edr3-testpod
+
+ > Name: aglais-gaia-edr3-testpod
+ > Namespace: aglais-20210125
+ > Node: aglais-20210125-cluster-rqiklyztkmq6-node-1/10.0.0.172
+ > Start Time: Mon, 25 Jan 2021 04:39:16 +0000
+ > Labels: aglais.dataset=aglais-gaia-edr3
+ > aglais.name=aglais-gaia-edr3-testpod
+ > app.kubernetes.io/instance=aglais-gaia-edr3
+ > app.kubernetes.io/managed-by=Helm
+ > app.kubernetes.io/name=manila-share
+ > app.kubernetes.io/version=0.0.1
+ > helm.sh/chart=manila-share-0.0.1
+ > Annotations: meta.helm.sh/release-name: aglais-gaia-edr3
+ > meta.helm.sh/release-namespace: aglais-20210125
+ > Status: Pending
+ > ....
+ > ....
+ > Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Normal Scheduled default-scheduler Successfully assigned aglais-20210125/aglais-gaia-edr3-testpod to aglais-20210125-cluster-rqiklyztkmq6-node-1
+ > Warning FailedMount 5s (x7 over 37s) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-1 MountVolume.MountDevice failed for volume "aglais-gaia-edr3-volume" : rpc error: code = InvalidArgument desc = stage secrets cannot be nil or empty
+
+
+ #
+ # Issue is intermittent ..
+ # Repeat the uninstall/install for DR2 and we get a different result.
+ #
+
+
+# -----------------------------------------------------
+# Install the DR2 Helm chart.
+#[root@kubernator]
+
+ helm --namespace ${namespace} uninstall aglais-gaia-dr2
+
+ > release "aglais-gaia-dr2" uninstalled
+
+ #
+ # Wait until the pod, pv and pvc have all gone.
+ #
+
+ sharename=aglais-gaia-dr2
+ mountpath=/data/gaia/dr2
+ sharemode='rw'
+
+ '/kubernetes/bin/cephfs-mount.sh' \
+ 'gaia-prod' \
+ "${namespace:?}" \
+ "${sharename:?}" \
+ "${mountpath:?}" \
+ "${sharemode:?}"
+
+ > ---- ---- ----
+ > File [cephfs-mount.sh]
+ > Path [/kubernetes/bin]
+ > ---- ---- ----
+ > Cloud name [gaia-prod]
+ > Namespace [aglais-20210125]
+ > Share name [aglais-gaia-dr2]
+ > Mount path [/data/gaia/dr2]
+ > Share mode [rw]
+ > ---- ---- ----
+ >
+ > ----
+ > Share uuid [2e46b5a5-c5d9-44c0-b11c-310c222f4818]
+ > ----
+ > Share size [512]
+ > ----
+ > Access rule [50ad6086-491d-4056-9092-c57ac49d4d3d]
+ > Release "aglais-gaia-dr2" does not exist. Installing it now.
+ > NAME: aglais-gaia-dr2
+ > LAST DEPLOYED: Mon Jan 25 04:43:46 2021
+ > NAMESPACE: aglais-20210125
+ > STATUS: deployed
+ > REVISION: 1
+ > TEST SUITE: None
+ > NOTES:
+ > Use the testpod to check access to the mounted volume.
+
+
+ kubectl --namespace ${namespace} get pv
+
+ > NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
+ > aglais-gaia-dr2-volume 512 RWX Retain Bound aglais-20210125/aglais-gaia-dr2-claim 18s
+ > ....
+ > ....
+
+
+ kubectl --namespace ${namespace} get pvc
+
+ > NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
+ > aglais-gaia-dr2-claim Bound aglais-gaia-dr2-volume 512 RWX 13s
+ > ....
+ > ....
+
+
+ kubectl --namespace ${namespace} describe pod aglais-gaia-dr2-testpod
+
+ > Name: aglais-gaia-dr2-testpod
+ > Namespace: aglais-20210125
+ > Node: aglais-20210125-cluster-rqiklyztkmq6-node-0/10.0.0.74
+ > Start Time: Mon, 25 Jan 2021 04:43:47 +0000
+ > Labels: aglais.dataset=aglais-gaia-dr2
+ > aglais.name=aglais-gaia-dr2-testpod
+ > app.kubernetes.io/instance=aglais-gaia-dr2
+ > app.kubernetes.io/managed-by=Helm
+ > app.kubernetes.io/name=manila-share
+ > app.kubernetes.io/version=0.0.1
+ > helm.sh/chart=manila-share-0.0.1
+ > Annotations: meta.helm.sh/release-name: aglais-gaia-dr2
+ > meta.helm.sh/release-namespace: aglais-20210125
+ > Status: Pending
+ > ....
+ > ....
+ > Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Normal Scheduled default-scheduler Successfully assigned aglais-20210125/aglais-gaia-dr2-testpod to aglais-20210125-cluster-rqiklyztkmq6-node-0
+ > Warning FailedMount 9s (x6 over 25s) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-0 MountVolume.MountDevice failed for volume "aglais-gaia-dr2-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-gaia-dr2-volume/globalmount: permission denied
+
+ #
+ # Issue is intermittent ..
+ # Repeat the uninstall/install for DR2 and we get a different result.
+ #
+
+# -----------------------------------------------------
+# Try again .....
+#[root@kubernator]
+
+ helm --namespace ${namespace} uninstall aglais-gaia-dr2
+
+ > release "aglais-gaia-dr2" uninstalled
+
+ #
+ # Wait until the pod, pv and pvc have all gone.
+ #
+
+ sharename=aglais-gaia-dr2
+ mountpath=/data/gaia/dr2
+ sharemode='rw'
+
+ '/kubernetes/bin/cephfs-mount.sh' \
+ 'gaia-prod' \
+ "${namespace:?}" \
+ "${sharename:?}" \
+ "${mountpath:?}" \
+ "${sharemode:?}"
+
+ > ---- ---- ----
+ > File [cephfs-mount.sh]
+ > Path [/kubernetes/bin]
+ > ---- ---- ----
+ > Cloud name [gaia-prod]
+ > Namespace [aglais-20210125]
+ > Share name [aglais-gaia-dr2]
+ > Mount path [/data/gaia/dr2]
+ > Share mode [rw]
+ > ---- ---- ----
+ >
+ > ----
+ > Share uuid [2e46b5a5-c5d9-44c0-b11c-310c222f4818]
+ > ----
+ > Share size [512]
+ > ----
+ > Access rule [50ad6086-491d-4056-9092-c57ac49d4d3d]
+ > Release "aglais-gaia-dr2" does not exist. Installing it now.
+ > NAME: aglais-gaia-dr2
+ > LAST DEPLOYED: Mon Jan 25 04:47:22 2021
+ > NAMESPACE: aglais-20210125
+ > STATUS: deployed
+ > REVISION: 1
+ > TEST SUITE: None
+ > NOTES:
+ > Use the testpod to check access to the mounted volume.
+
+
+ kubectl --namespace ${namespace} describe pod aglais-gaia-dr2-testpod
+
+
+ > Name: aglais-gaia-dr2-testpod
+ > Namespace: aglais-20210125
+ > Node: aglais-20210125-cluster-rqiklyztkmq6-node-3/10.0.0.107
+ > Start Time: Mon, 25 Jan 2021 04:47:23 +0000
+ > Labels: aglais.dataset=aglais-gaia-dr2
+ > aglais.name=aglais-gaia-dr2-testpod
+ > app.kubernetes.io/instance=aglais-gaia-dr2
+ > app.kubernetes.io/managed-by=Helm
+ > app.kubernetes.io/name=manila-share
+ > app.kubernetes.io/version=0.0.1
+ > helm.sh/chart=manila-share-0.0.1
+ > Annotations: meta.helm.sh/release-name: aglais-gaia-dr2
+ > meta.helm.sh/release-namespace: aglais-20210125
+ > Status: Running
+ > ....
+ > ....
+ > Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Normal Scheduled default-scheduler Successfully assigned aglais-20210125/aglais-gaia-dr2-testpod to aglais-20210125-cluster-rqiklyztkmq6-node-3
+ > Normal Pulling 33s kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-3 Pulling image "fedora:latest"
+ > Normal Pulled 30s kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-3 Successfully pulled image "fedora:latest"
+ > Normal Created 30s kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-3 Created container aglais-gaia-dr2-container
+ > Normal Started 30s kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-3 Started container aglais-gaia-dr2-container
+
+
+ #
+ # Issue is intermittent ..
+ # Repeat the uninstall/install for eDR3 and we get a different result.
+ #
+
+# -----------------------------------------------------
+# Try again .....
+#[root@kubernator]
+
+ helm --namespace ${namespace} uninstall aglais-gaia-edr3
+
+ > release "aglais-gaia-edr3" uninstalled
+
+ #
+ # Wait until the pod, pv and pvc have all gone.
+ #
+
+ sharename=aglais-gaia-edr3
+ mountpath=/data/gaia/edr3
+ sharemode='rw'
+
+ '/kubernetes/bin/cephfs-mount.sh' \
+ 'gaia-prod' \
+ "${namespace:?}" \
+ "${sharename:?}" \
+ "${mountpath:?}" \
+ "${sharemode:?}"
+
+ > ---- ---- ----
+ > File [cephfs-mount.sh]
+ > Path [/kubernetes/bin]
+ > ---- ---- ----
+ > Cloud name [gaia-prod]
+ > Namespace [aglais-20210125]
+ > Share name [aglais-gaia-edr3]
+ > Mount path [/data/gaia/edr3]
+ > Share mode [rw]
+ > ---- ---- ----
+ >
+ > ----
+ > Share uuid [ca8231c3-1f5c-4ebf-8ec0-d3cfe2629976]
+ > ----
+ > Share size [540]
+ > ----
+ > Access rule [0a4b37bc-e07e-4763-a8af-4d9cf3ae9620]
+ > Release "aglais-gaia-edr3" does not exist. Installing it now.
+ > NAME: aglais-gaia-edr3
+ > LAST DEPLOYED: Mon Jan 25 04:55:26 2021
+ > NAMESPACE: aglais-20210125
+ > STATUS: deployed
+ > REVISION: 1
+ > TEST SUITE: None
+ > NOTES:
+ > Use the testpod to check access to the mounted volume.
+
+
+ kubectl --namespace ${namespace} describe pod aglais-gaia-edr3-testpod
+
+ > Name: aglais-gaia-edr3-testpod
+ > Namespace: aglais-20210125
+ > Node: aglais-20210125-cluster-rqiklyztkmq6-node-1/10.0.0.172
+ > Start Time: Mon, 25 Jan 2021 04:55:27 +0000
+ > Labels: aglais.dataset=aglais-gaia-edr3
+ > aglais.name=aglais-gaia-edr3-testpod
+ > app.kubernetes.io/instance=aglais-gaia-edr3
+ > app.kubernetes.io/managed-by=Helm
+ > app.kubernetes.io/name=manila-share
+ > app.kubernetes.io/version=0.0.1
+ > helm.sh/chart=manila-share-0.0.1
+ > Annotations: meta.helm.sh/release-name: aglais-gaia-edr3
+ > meta.helm.sh/release-namespace: aglais-20210125
+ > Status: Pending
+ > ....
+ > ....
+ > Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Normal Scheduled default-scheduler Successfully assigned aglais-20210125/aglais-gaia-edr3-testpod to aglais-20210125-cluster-rqiklyztkmq6-node-1
+ > Warning FailedMount 2s (x7 over 34s) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-1 MountVolume.MountDevice failed for volume "aglais-gaia-edr3-volume" : rpc error: code = InvalidArgument desc = stage secrets cannot be nil or empty
+
+
+ #
+ # DR2 is intermittent.
+ # Haven't seen eDR3 work at all.
+ # Try 2mass ...
+ #
+
+
+# -----------------------------------------------------
+# Try twomass
+#[root@kubernator]
+
+ helm --namespace ${namespace} uninstall aglais-twomass-allsky
+
+ > release "aglais-twomass-allsky" uninstalled
+
+ #
+ # Wait until the pod, pv and pvc have all gone.
+ #
+
+ sharename=aglais-twomass-allsky
+ mountpath=/data/twomass/allsky
+ sharemode='rw'
+
+ '/kubernetes/bin/cephfs-mount.sh' \
+ 'gaia-prod' \
+ "${namespace:?}" \
+ "${sharename:?}" \
+ "${mountpath:?}" \
+ "${sharemode:?}"
+
+ > ---- ---- ----
+ > File [cephfs-mount.sh]
+ > Path [/kubernetes/bin]
+ > ---- ---- ----
+ > Cloud name [gaia-prod]
+ > Namespace [aglais-20210125]
+ > Share name [aglais-twomass-allsky]
+ > Mount path [/data/twomass/allsky]
+ > Share mode [rw]
+ > ---- ---- ----
+ >
+ > ----
+ > Share uuid [9dc3016a-f010-48bc-89fc-a9cbd688b7cc]
+ > ----
+ > Share size [40]
+ > ----
+ > Access rule [5647d075-83fb-4a60-b562-a5248da54ec7]
+ > Release "aglais-twomass-allsky" does not exist. Installing it now.
+ > NAME: aglais-twomass-allsky
+ > LAST DEPLOYED: Mon Jan 25 04:59:54 2021
+ > NAMESPACE: aglais-20210125
+ > STATUS: deployed
+ > REVISION: 1
+ > TEST SUITE: None
+ > NOTES:
+ > Use the testpod to check access to the mounted volume.
+
+
+ kubectl --namespace ${namespace} describe pod aglais-twomass-allsky-testpod
+
+ > Name: aglais-twomass-allsky-testpod
+ > Namespace: aglais-20210125
+ > Node: aglais-20210125-cluster-rqiklyztkmq6-node-0/10.0.0.74
+ > Start Time: Mon, 25 Jan 2021 04:59:55 +0000
+ > Labels: aglais.dataset=aglais-twomass-allsky
+ > aglais.name=aglais-twomass-allsky-testpod
+ > app.kubernetes.io/instance=aglais-twomass-allsky
+ > app.kubernetes.io/managed-by=Helm
+ > app.kubernetes.io/name=manila-share
+ > app.kubernetes.io/version=0.0.1
+ > helm.sh/chart=manila-share-0.0.1
+ > Annotations: meta.helm.sh/release-name: aglais-twomass-allsky
+ > meta.helm.sh/release-namespace: aglais-20210125
+ > Status: Pending
+ > ....
+ > ....
+ > Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Normal Scheduled default-scheduler Successfully assigned aglais-20210125/aglais-twomass-allsky-testpod to aglais-20210125-cluster-rqiklyztkmq6-node-0
+ > Warning FailedMount 11s (x7 over 43s) kubelet, aglais-20210125-cluster-rqiklyztkmq6-node-0 MountVolume.MountDevice failed for volume "aglais-twomass-allsky-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-twomass-allsky-volume/globalmount: permission denied
+
+
+ #
+ # DR2 works, some of the time.
+ # eDR3 fails with 'stage secrets cannot be nil'.
+ # twomass fails with 'permission denied'
+ #
+
+
diff --git a/notes/zrq/20210125-02-kubernetes-deploy.txt b/notes/zrq/20210125-02-kubernetes-deploy.txt
new file mode 100644
index 00000000..6925dc32
--- /dev/null
+++ b/notes/zrq/20210125-02-kubernetes-deploy.txt
@@ -0,0 +1,288 @@
+#
+#
+#
+# Copyright (c) 2021, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+
+ Target:
+
+ Try to get the Kubernetes deployment to work.
+ Starting from clean ... again.
+
+ Results:
+
+ Failed.
+ Failing to mount the PV claims, intermittent .. different results, different reasons.
+
+# -----------------------------------------------------
+# Update the Openstack cloud name.
+#[user@desktop]
+
+ cloudname=gaia-dev
+
+ sed -i '
+ s/^\(AGLAIS_CLOUD\)=.*$/\1='${cloudname:?}'/
+ ' "${HOME}/aglais.env"
+
+# -----------------------------------------------------
+# Create a container to work with.
+# (*) extra volume mount for /common
+# (*) mount kubernetes directory as read/write
+#[user@desktop]
+
+ source "${HOME:?}/aglais.env"
+
+ podman run \
+ --rm \
+ --tty \
+ --interactive \
+ --name kubernator \
+ --hostname kubernator \
+ --env "SSH_AUTH_SOCK=/mnt/ssh_auth_sock" \
+ --volume "${SSH_AUTH_SOCK}:/mnt/ssh_auth_sock:rw,z" \
+ --env "cloudname=${AGLAIS_CLOUD:?}" \
+ --volume "${HOME:?}/clouds.yaml:/etc/openstack/clouds.yaml:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/common:/common:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/openstack:/openstack:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/kubernetes:/kubernetes:rw,z" \
+ atolmis/ansible-client:2020.12.02 \
+ bash
+
+
+# -----------------------------------------------------
+# Delete everything.
+#[root@kubernator]
+
+ /openstack/bin/delete-all.sh \
+ "${cloudname:?}"
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Create our Aglais configuration.
+#[root@kubernator]
+
+cat > '/tmp/aglais-config.yml' << EOF
+aglais:
+ version: 1.0
+ spec:
+ openstack:
+ cloudname: ${cloudname:?}
+ dashboard:
+ hostname: dashboard.metagrid.xyz
+ zeppelin:
+ hostname: zeppelin.metagrid.xyz
+ drupal:
+ hostname: drupal.metagrid.xyz
+EOF
+
+
+# -----------------------------------------------------
+# Create everything.
+#[root@kubernator]
+
+ /kubernetes/bin/create-all.sh
+
+ > ....
+ > ....
+ > Installing dashboard Helm chart
+ > Namespace [aglais-20210125]
+ > Dash host [dashboard.metagrid.xyz]
+ > Getting updates for unmanaged Helm repositories...
+ > ...Successfully got an update from the "https://kubernetes.github.io/dashboard" chart repository
+ > Saving 1 charts
+ > Downloading kubernetes-dashboard from repo https://kubernetes.github.io/dashboard
+ > Deleting outdated charts
+ > Release "aglais-dashboard" does not exist. Installing it now.
+ > NAME: aglais-dashboard
+ > LAST DEPLOYED: Mon Jan 25 05:27:58 2021
+ > NAMESPACE: aglais-20210125
+ > STATUS: deployed
+ > REVISION: 1
+ > TEST SUITE: None
+ > ....
+ > ....
+
+ #
+ # Dashboard worked this time, no errors.
+ #
+
+# -----------------------------------------------------
+# Check the results.
+#[root@kubernator]
+
+ cat '/tmp/aglais-status.yml'
+
+ > aglais:
+ > status:
+ > deployment:
+ > type: kubernetes
+ > name: aglais-20210125
+ > date: 20210125:051944
+ > openstack:
+ > cluster:
+ > id: 958b10a3-aa60-4762-8405-5101eaaf6e1f
+ > kubernetes:
+ > namespace: aglais-20210125
+ > spec:
+ > openstack:
+ > cloudname: gaia-dev
+
+
+# -----------------------------------------------------
+# Get the cluster ID and K8s namespace.
+#[root@kubernator]
+
+ clusterid=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.openstack.cluster.id'
+ )
+
+ namespace=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.kubernetes.namespace'
+ )
+
+cat << EOF
+Cluster ID [${clusterid}]
+Name space [${namespace}]
+EOF
+
+
+ > Cluster ID [958b10a3-aa60-4762-8405-5101eaaf6e1f]
+ > Name space [aglais-20210125]
+
+
+# -----------------------------------------------------
+# Get the Dashboard ServiceAccount token.
+#[root@kubernator]
+
+ secretname=$(
+ kubectl \
+ --output json \
+ --namespace "${namespace:?}" \
+ get ServiceAccount \
+ "aglais-dashboard-kubernetes-dashboard" \
+ | jq -r '.secrets[0].name'
+ )
+
+ kubectl \
+ --output json \
+ --namespace "${namespace:?}" \
+ get Secret \
+ "${secretname:?}" \
+ | jq -r '.data.token | @base64d'
+
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Get the Ingress address.
+#[root@kubernator]
+
+ kubectl \
+ --namespace "${namespace:?}" \
+ get Ingress
+
+ > NAME HOSTS ADDRESS PORTS AGE
+ > aglais-dashboard-kubernetes-dashboard dashboard.metagrid.xyz 128.232.227.177 80 5m6s
+ > zeppelin-server-ingress zeppelin.metagrid.xyz 128.232.227.177 80, 443 3m20s
+
+
+ zeppelinip=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get Ingress \
+ --output json \
+ | jq -r '
+ .items[]
+ | select(.metadata.name == "zeppelin-server-ingress")
+ | .status.loadBalancer.ingress[0].ip
+ '
+ )
+
+ echo "Zeppelin IP [${zeppelinip:?}]"
+
+ > Zeppelin IP [128.232.227.177]
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ 'aglais.status.kubernetes.ingress.zeppelin.ipv4' \
+ "${zeppelinip:?}"
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+
+ #
+ # Update our DNS ..
+ #
+
+
+# -----------------------------------------------------
+# Check the Dashboard page.
+#[root@kubernator]
+
+ curl --head --insecure "https://dashboard.metagrid.xyz/"
+
+ > HTTP/2 200
+ > date: Mon, 25 Jan 2021 05:34:47 GMT
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the Zeppelin page.
+#[root@kubernator]
+
+ curl --head --insecure "https://zeppelin.metagrid.xyz/"
+
+ > HTTP/2 200
+ > date: Mon, 25 Jan 2021 05:35:12 GMT
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Login to Dashboard and test ...
+#[user@desktop]
+
+ firefox --new-window "https://dashboard.metagrid.xyz/" &
+
+ > Dashboard looks good.
+ > Token works :-)
+
+
+ #
+ # Checking in dashboard, none of the share mounts worked :-(
+ # Failing to mount the PV claims .. different results, different reasons.
+ #
+
+
+
diff --git a/notes/zrq/20210125-03-ansible-deploy.txt b/notes/zrq/20210125-03-ansible-deploy.txt
new file mode 100644
index 00000000..d1c9c818
--- /dev/null
+++ b/notes/zrq/20210125-03-ansible-deploy.txt
@@ -0,0 +1,319 @@
+#
+#
+#
+# Copyright (c) 2021, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+
+ Target:
+
+ Test if the Manila shares work on the Ansible deploy ...
+
+ Results:
+
+ Success.
+ All the nodes deployed correctly.
+ Data shares appear to be mounted correctly.
+
+ TODO:
+
+ We still need some tools to verify the contents.
+ - https://github.com/wfau/aglais/issues/82
+ - https://github.com/wfau/aglais/issues/323
+ - https://github.com/wfau/aglais/issues/32
+
+
+
+# -----------------------------------------------------
+# Update the Openstack cloud name.
+#[user@desktop]
+
+ cloudname=gaia-dev
+
+ sed -i '
+ s/^\(AGLAIS_CLOUD\)=.*$/\1='${cloudname:?}'/
+ ' "${HOME}/aglais.env"
+
+
+# -----------------------------------------------------
+# Create a container to work with.
+# (*) extra volume mount for /common
+#[user@desktop]
+
+ source "${HOME:?}/aglais.env"
+
+ podman run \
+ --rm \
+ --tty \
+ --interactive \
+ --name ansibler \
+ --hostname ansibler \
+ --env "SSH_AUTH_SOCK=/mnt/ssh_auth_sock" \
+ --volume "${SSH_AUTH_SOCK}:/mnt/ssh_auth_sock:rw,z" \
+ --env "cloudname=${AGLAIS_CLOUD:?}" \
+ --volume "${HOME:?}/clouds.yaml:/etc/openstack/clouds.yaml:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/common:/common:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/openstack:/openstack:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/hadoop-yarn:/hadoop-yarn:ro,z" \
+ atolmis/ansible-client:2020.12.02 \
+ bash
+
+
+# -----------------------------------------------------
+# Create our Aglais configuration.
+#[root@ansibler]
+
+cat > /tmp/aglais-config.yml << EOF
+aglais:
+ version: 1.0
+ spec:
+ openstack:
+ cloudname: ${cloudname:?}
+EOF
+
+
+# -----------------------------------------------------
+# Delete everything.
+#[root@ansibler]
+
+ /openstack/bin/delete-all.sh \
+ "${cloudname:?}"
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Create everything.
+#[root@ansibler]
+
+ /hadoop-yarn/bin/create-all.sh
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the results.
+#[root@ansibler]
+
+ cat '/tmp/aglais-status.yml'
+
+ > aglais:
+ > status:
+ > deployment:
+ > type: hadoop-yarn
+ > name: aglais-20210125
+ > date: 20210125:054847
+ > spec:
+ > openstack:
+ > cloudname: gaia-dev
+
+ buildtag=$(
+ yq read \
+ '/tmp/aglais-status.yml' \
+ 'aglais.status.deployment.name'
+ )
+
+
+# -----------------------------------------------------
+# Get the public IP address of our Zeppelin node.
+#[root@ansibler]
+
+ zeppelinid=$(
+ openstack \
+ --os-cloud "${cloudname:?}" \
+ server list \
+ --format json \
+ | jq -r '.[] | select(.Name == "'${buildtag:?}'-zeppelin") | .ID'
+ )
+
+ zeppelinip=$(
+ openstack \
+ --os-cloud "${cloudname:?}" \
+ server show \
+ --format json \
+ "${zeppelinid:?}" \
+ | jq -r '.addresses' \
+ | sed '
+ s/[[:space:]]//
+ s/.*=\(.*\)/\1/
+ s/.*,\(.*\)/\1/
+ '
+ )
+
+cat << EOF
+Zeppelin ID [${zeppelinid:?}]
+Zeppelin IP [${zeppelinip:?}]
+EOF
+
+ > Zeppelin ID [ba030ca2-cce1-47b7-b8df-249691c92fa7]
+ > Zeppelin IP [128.232.227.242]
+
+
+# -----------------------------------------------------
+# Login to the Zeppelin node and check the data shares.
+#[root@ansibler]
+
+ sharelist='/common/manila/datashares.yaml'
+
+ for shareid in $(
+ yq read "${sharelist:?}" 'shares.[*].id'
+ )
+ do
+ echo ""
+ echo "---- ----"
+ echo "Share [${shareid:?}]"
+ echo "----"
+
+ sharename=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).sharename")
+ mountpath=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).mountpath")
+
+ ssh "fedora@${zeppelinip:?}" \
+ "
+ date
+ hostname
+ echo '----'
+ df -h '${mountpath:?}'
+ echo '----'
+ ls -al '${mountpath:?}' | tail
+ "
+ done
+
+
+ > ---- ----
+ > Share [GDR2]
+ > ----
+ > Mon Jan 25 12:01:46 UTC 2021
+ > aglais-20210125-zeppelin.novalocal
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 512G 473G 40G 93% /data/gaia/dr2
+ > ----
+ > -rw-r--r--. 1 fedora fedora 30825240 Oct 24 17:59 part-06504-70392076-8b82-4457-8828-22069e7626e9-c000.snappy.parquet
+ > -rw-r--r--. 1 fedora fedora 31802127 Oct 24 17:59 part-06505-70392076-8b82-4457-8828-22069e7626e9-c000.snappy.parquet
+ > -rw-r--r--. 1 fedora fedora 31538538 Oct 24 17:59 part-06506-70392076-8b82-4457-8828-22069e7626e9-c000.snappy.parquet
+ > -rw-r--r--. 1 fedora fedora 31218434 Oct 24 17:59 part-06507-70392076-8b82-4457-8828-22069e7626e9-c000.snappy.parquet
+ > -rw-r--r--. 1 fedora fedora 30815074 Oct 24 17:59 part-06508-70392076-8b82-4457-8828-22069e7626e9-c000.snappy.parquet
+ > -rw-r--r--. 1 fedora fedora 30406730 Oct 24 17:59 part-06509-70392076-8b82-4457-8828-22069e7626e9-c000.snappy.parquet
+ > -rw-r--r--. 1 fedora fedora 29995058 Oct 24 17:59 part-06510-70392076-8b82-4457-8828-22069e7626e9-c000.snappy.parquet
+ > -rw-r--r--. 1 fedora fedora 29447614 Oct 24 17:59 part-06511-70392076-8b82-4457-8828-22069e7626e9-c000.snappy.parquet
+ > -rw-r--r--. 1 fedora fedora 28448646 Oct 24 17:59 part-06512-70392076-8b82-4457-8828-22069e7626e9-c000.snappy.parquet
+ > -rw-r--r--. 1 fedora fedora 6317774 Oct 24 17:59 part-06513-70392076-8b82-4457-8828-22069e7626e9-c000.snappy.parquet
+ >
+ > ---- ----
+ > Share [GEDR3]
+ > ----
+ > Mon Jan 25 12:01:47 UTC 2021
+ > aglais-20210125-zeppelin.novalocal
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 540G 533G 7.9G 99% /data/gaia/edr3
+ > ----
+ > -rw-r--r--. 1 root root 36858229 Jan 11 22:27 part-11922-59b9273a-2ef1-4988-8778-e00f67e65264-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 35391788 Jan 11 22:27 part-11923-59b9273a-2ef1-4988-8778-e00f67e65264-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 39969879 Jan 11 22:27 part-11924-59b9273a-2ef1-4988-8778-e00f67e65264-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 38923149 Jan 11 22:27 part-11925-59b9273a-2ef1-4988-8778-e00f67e65264-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 36280019 Jan 11 22:27 part-11926-59b9273a-2ef1-4988-8778-e00f67e65264-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 39559908 Jan 11 22:27 part-11927-59b9273a-2ef1-4988-8778-e00f67e65264-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 34715127 Jan 11 22:27 part-11928-59b9273a-2ef1-4988-8778-e00f67e65264-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 35453747 Jan 11 22:27 part-11929-59b9273a-2ef1-4988-8778-e00f67e65264-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 30599245 Jan 11 22:27 part-11930-59b9273a-2ef1-4988-8778-e00f67e65264-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 10852913 Jan 11 22:27 part-11931-59b9273a-2ef1-4988-8778-e00f67e65264-c000.snappy.parquet
+ >
+ > ---- ----
+ > Share [ALLWISE]
+ > ----
+ > Mon Jan 25 12:01:49 UTC 2021
+ > aglais-20210125-zeppelin.novalocal
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 350G 341G 9.9G 98% /data/wise/allwise
+ > ----
+ > -rw-r--r--. 1 root root 21195981 Jan 11 21:26 part-09124-6f95fee1-90c7-4207-911a-ebcc0ef05615-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 20760761 Jan 11 21:26 part-09125-6f95fee1-90c7-4207-911a-ebcc0ef05615-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 37549253 Jan 11 21:26 part-09126-6f95fee1-90c7-4207-911a-ebcc0ef05615-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 32687920 Jan 11 21:26 part-09127-6f95fee1-90c7-4207-911a-ebcc0ef05615-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 30215740 Jan 11 21:26 part-09128-6f95fee1-90c7-4207-911a-ebcc0ef05615-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 26528776 Jan 11 21:26 part-09129-6f95fee1-90c7-4207-911a-ebcc0ef05615-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 36999673 Jan 11 21:26 part-09130-6f95fee1-90c7-4207-911a-ebcc0ef05615-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 30382801 Jan 11 21:26 part-09131-6f95fee1-90c7-4207-911a-ebcc0ef05615-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 31622359 Jan 11 21:26 part-09132-6f95fee1-90c7-4207-911a-ebcc0ef05615-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 9956618 Jan 11 21:26 part-09133-6f95fee1-90c7-4207-911a-ebcc0ef05615-c000.snappy.parquet
+ >
+ > ---- ----
+ > Share [PS1]
+ > ----
+ > Mon Jan 25 12:01:52 UTC 2021
+ > aglais-20210125-zeppelin.novalocal
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 300G 270G 31G 90% /data/panstarrs/dr1
+ > ----
+ > -rw-r--r--. 1 root root 27803868 Jan 11 19:43 part-07723-22b55fbd-2678-4993-8e3a-3f384b1854bc-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 22025506 Jan 11 19:43 part-07724-22b55fbd-2678-4993-8e3a-3f384b1854bc-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 25756891 Jan 11 19:43 part-07725-22b55fbd-2678-4993-8e3a-3f384b1854bc-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 31396660 Jan 11 19:43 part-07726-22b55fbd-2678-4993-8e3a-3f384b1854bc-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 26859792 Jan 11 19:44 part-07727-22b55fbd-2678-4993-8e3a-3f384b1854bc-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 24735889 Jan 11 19:44 part-07728-22b55fbd-2678-4993-8e3a-3f384b1854bc-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 25470955 Jan 11 19:44 part-07729-22b55fbd-2678-4993-8e3a-3f384b1854bc-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 25640631 Jan 11 19:44 part-07730-22b55fbd-2678-4993-8e3a-3f384b1854bc-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 22504695 Jan 11 19:44 part-07731-22b55fbd-2678-4993-8e3a-3f384b1854bc-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 13200198 Jan 11 19:44 part-07732-22b55fbd-2678-4993-8e3a-3f384b1854bc-c000.snappy.parquet
+ >
+ > ---- ----
+ > Share [2MASS]
+ > ----
+ > Mon Jan 25 12:01:53 UTC 2021
+ > aglais-20210125-zeppelin.novalocal
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 40G 37G 3.5G 92% /data/twomass/allsky
+ > ----
+ > -rw-r--r--. 1 root root 16875933 Jan 11 17:44 part-01176-ce75a128-1cde-4ce1-90fc-4a36208209b2-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 31847987 Jan 11 17:44 part-01177-ce75a128-1cde-4ce1-90fc-4a36208209b2-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 33978033 Jan 11 17:45 part-01178-ce75a128-1cde-4ce1-90fc-4a36208209b2-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 33170642 Jan 11 17:45 part-01179-ce75a128-1cde-4ce1-90fc-4a36208209b2-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 33115257 Jan 11 17:45 part-01180-ce75a128-1cde-4ce1-90fc-4a36208209b2-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 33854964 Jan 11 17:45 part-01181-ce75a128-1cde-4ce1-90fc-4a36208209b2-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 31874821 Jan 11 17:45 part-01182-ce75a128-1cde-4ce1-90fc-4a36208209b2-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 33091386 Jan 11 17:45 part-01183-ce75a128-1cde-4ce1-90fc-4a36208209b2-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 31078087 Jan 11 17:45 part-01184-ce75a128-1cde-4ce1-90fc-4a36208209b2-c000.snappy.parquet
+ > -rw-r--r--. 1 root root 14460710 Jan 11 17:45 part-01185-ce75a128-1cde-4ce1-90fc-4a36208209b2-c000.snappy.parquet
+
+
+ #
+ # Looks OK.
+ #
+ # We still need something to verify the contents.
+ # https://github.com/wfau/aglais/issues/82
+ # https://github.com/wfau/aglais/issues/323
+ # https://github.com/wfau/aglais/issues/32
+ #
+
+
+
+
+
+
diff --git a/notes/zrq/20210125-04-kubernetes-deploy.txt b/notes/zrq/20210125-04-kubernetes-deploy.txt
new file mode 100644
index 00000000..f5b535da
--- /dev/null
+++ b/notes/zrq/20210125-04-kubernetes-deploy.txt
@@ -0,0 +1,1284 @@
+#
+#
+#
+# Copyright (c) 2021, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+
+ Target:
+
+ Try to get the Kubernetes deployment to work.
+ Starting from clean ... again.
+
+ Results:
+
+ Success.
+ Figured out what was causing the problems.
+ All the csi-manila shares mounted and tested.
+
+ TODO:
+
+ We still need some tools to verify the contents.
+ - https://github.com/wfau/aglais/issues/82
+ - https://github.com/wfau/aglais/issues/323
+ - https://github.com/wfau/aglais/issues/32
+
+
+
+# -----------------------------------------------------
+# Update the Openstack cloud name.
+#[user@desktop]
+
+ cloudname=gaia-dev
+
+ sed -i '
+ s/^\(AGLAIS_CLOUD\)=.*$/\1='${cloudname:?}'/
+ ' "${HOME}/aglais.env"
+
+# -----------------------------------------------------
+# Create a container to work with.
+# (*) extra volume mount for /common
+# (*) mount kubernetes directory as read/write
+#[user@desktop]
+
+ source "${HOME:?}/aglais.env"
+
+ podman run \
+ --rm \
+ --tty \
+ --interactive \
+ --name kubernator \
+ --hostname kubernator \
+ --env "SSH_AUTH_SOCK=/mnt/ssh_auth_sock" \
+ --volume "${SSH_AUTH_SOCK}:/mnt/ssh_auth_sock:rw,z" \
+ --env "cloudname=${AGLAIS_CLOUD:?}" \
+ --volume "${HOME:?}/clouds.yaml:/etc/openstack/clouds.yaml:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/common:/common:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/openstack:/openstack:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/kubernetes:/kubernetes:rw,z" \
+ atolmis/ansible-client:2020.12.02 \
+ bash
+
+
+# -----------------------------------------------------
+# Delete everything.
+#[root@kubernator]
+
+ /openstack/bin/delete-all.sh \
+ "${cloudname:?}"
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Create our Aglais configuration.
+#[root@kubernator]
+
+cat > '/tmp/aglais-config.yml' << EOF
+aglais:
+ version: 1.0
+ spec:
+ openstack:
+ cloudname: ${cloudname:?}
+ dashboard:
+ hostname: dashboard.metagrid.xyz
+ zeppelin:
+ hostname: zeppelin.metagrid.xyz
+ drupal:
+ hostname: drupal.metagrid.xyz
+EOF
+
+
+# -----------------------------------------------------
+# Create everything.
+#[root@kubernator]
+
+ /kubernetes/bin/create-all.sh
+
+ > ....
+ > ....
+
+ #
+ # Dashboard installed OK this time ...
+ #
+
+
+# -----------------------------------------------------
+# Check the results.
+#[root@kubernator]
+
+ cat '/tmp/aglais-status.yml'
+
+ > aglais:
+ > status:
+ > deployment:
+ > type: kubernetes
+ > name: aglais-20210125
+ > date: 20210125:123446
+ > openstack:
+ > cloudname: gaia-dev
+ > magnum:
+ > uuid: fb90b7c3-49d8-48e8-8b7b-60976ba3f187
+ > kubernetes:
+ > namespace: aglais-20210125
+ > ingress:
+ > dashboard:
+ > hostname: dashboard.metagrid.xyz
+ > ipv4: null
+ > zeppelin:
+ > hostname: zeppelin.metagrid.xyz
+ > ipv4: null
+
+
+# -----------------------------------------------------
+# Get the cluster ID and K8s namespace.
+#[root@kubernator]
+
+ magnumid=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.openstack.magnum.uuid'
+ )
+
+ namespace=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.kubernetes.namespace'
+ )
+
+cat << EOF
+Magnum uuid [${magnumid}]
+Name space [${namespace}]
+EOF
+
+ > Magnum uuid [fb90b7c3-49d8-48e8-8b7b-60976ba3f187]
+ > Name space [aglais-20210125]
+
+
+# -----------------------------------------------------
+# Get the Dashboard ServiceAccount token.
+#[root@kubernator]
+
+ secretname=$(
+ kubectl \
+ --output json \
+ --namespace "${namespace:?}" \
+ get ServiceAccount \
+ "aglais-dashboard-kubernetes-dashboard" \
+ | jq -r '.secrets[0].name'
+ )
+
+ kubectl \
+ --output json \
+ --namespace "${namespace:?}" \
+ get Secret \
+ "${secretname:?}" \
+ | jq -r '.data.token | @base64d'
+
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check our ingress status.
+# ** Kubernetes needs time time to allocate the IP address.
+#[root@kubernator]
+
+ kubectl \
+ --namespace "${namespace:?}" \
+ get Ingress
+
+ > NAME HOSTS ADDRESS PORTS AGE
+ > aglais-dashboard-kubernetes-dashboard dashboard.metagrid.xyz 128.232.227.177 80 5m6s
+ > zeppelin-server-ingress zeppelin.metagrid.xyz 128.232.227.177 80, 443 3m20s
+
+
+# -----------------------------------------------------
+# Capture our Dashboard ingress IP address.
+# ** Kubernetes needs time time to allocate the IP address.
+
+ daship=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get Ingress \
+ --output json \
+ | jq -r '
+ .items[]
+ | select(.metadata.name == "aglais-dashboard-kubernetes-dashboard")
+ | .status.loadBalancer.ingress[0].ip
+ '
+ )
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ 'aglais.status.kubernetes.ingress.dashboard.ipv4' \
+ "${daship}"
+
+
+# -----------------------------------------------------
+# Capture our Zeppelin ingress IP address.
+# ** Kubernetes needs time time to allocate the IP address.
+
+ zeppip=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get Ingress \
+ --output json \
+ | jq -r '
+ .items[]
+ | select(.metadata.name == "zeppelin-server-ingress")
+ | .status.loadBalancer.ingress[0].ip
+ '
+ )
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ 'aglais.status.kubernetes.ingress.zeppelin.ipv4' \
+ "${zeppip}"
+
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+
+ #
+ # Update our DNS ..
+ #
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Check the Dashboard page.
+#[root@kubernator]
+
+ curl --head --insecure "https://dashboard.metagrid.xyz/"
+
+ > HTTP/2 200
+ > date: Mon, 25 Jan 2021 13:05:50 GMT
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the Zeppelin page.
+#[root@kubernator]
+
+ curl --head --insecure "https://zeppelin.metagrid.xyz/"
+
+ > HTTP/2 200
+ > date: Mon, 25 Jan 2021 13:06:05 GMT
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the test pod events for the data shares.
+#[root@kubernator]
+
+ sharelist='/common/manila/datashares.yaml'
+
+ for shareid in $(
+ yq read "${sharelist:?}" 'shares.[*].id'
+ )
+ do
+ echo ""
+ echo "---- ----"
+ echo "Share [${shareid:?}]"
+ echo "----"
+
+ sharename=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).sharename")
+ mountpath=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).mountpath")
+
+ kubectl \
+ --namespace "${namespace:?}" \
+ get event \
+ --field-selector "involvedObject.name=${sharename:?}-testpod"
+
+ done
+
+ > ---- ----
+ > Share [GDR2]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-gaia-dr2-testpod Successfully assigned aglais-20210125/aglais-gaia-dr2-testpod to aglais-20210125-cluster-shr4k5gaja5a-node-2
+ > 18m Warning FailedMount pod/aglais-gaia-dr2-testpod MountVolume.MountDevice failed for volume "aglais-gaia-dr2-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-gaia-dr2-volume/globalmount: permission denied
+ > 8m33s Warning FailedMount pod/aglais-gaia-dr2-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[test-data local-data default-token-fwhqc]: timed out waiting for the condition
+ > 3m59s Warning FailedMount pod/aglais-gaia-dr2-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[local-data default-token-fwhqc test-data]: timed out waiting for the condition
+ > 33m Warning FailedMount pod/aglais-gaia-dr2-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[default-token-fwhqc test-data local-data]: timed out waiting for the condition
+ >
+ > ---- ----
+ > Share [GEDR3]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-gaia-edr3-testpod Successfully assigned aglais-20210125/aglais-gaia-edr3-testpod to aglais-20210125-cluster-shr4k5gaja5a-node-3
+ > 18m Warning FailedMount pod/aglais-gaia-edr3-testpod MountVolume.MountDevice failed for volume "aglais-gaia-edr3-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-gaia-edr3-volume/globalmount: permission denied
+ > 8m17s Warning FailedMount pod/aglais-gaia-edr3-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[test-data local-data default-token-fwhqc]: timed out waiting for the condition
+ > 3m46s Warning FailedMount pod/aglais-gaia-edr3-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[default-token-fwhqc test-data local-data]: timed out waiting for the condition
+ >
+ > ---- ----
+ > Share [ALLWISE]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-wise-allwise-testpod Successfully assigned aglais-20210125/aglais-wise-allwise-testpod to aglais-20210125-cluster-shr4k5gaja5a-node-0
+ > 17m Warning FailedMount pod/aglais-wise-allwise-testpod MountVolume.MountDevice failed for volume "aglais-wise-allwise-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-wise-allwise-volume/globalmount: permission denied
+ > 3m30s Warning FailedMount pod/aglais-wise-allwise-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[test-data local-data default-token-fwhqc]: timed out waiting for the condition
+ > 8m4s Warning FailedMount pod/aglais-wise-allwise-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[default-token-fwhqc test-data local-data]: timed out waiting for the condition
+ >
+ > ---- ----
+ > Share [PS1]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-panstarrs-dr1-testpod Successfully assigned aglais-20210125/aglais-panstarrs-dr1-testpod to aglais-20210125-cluster-shr4k5gaja5a-node-0
+ > 17m Warning FailedMount pod/aglais-panstarrs-dr1-testpod MountVolume.MountDevice failed for volume "aglais-panstarrs-dr1-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-panstarrs-dr1-volume/globalmount: permission denied
+ > 30m Warning FailedMount pod/aglais-panstarrs-dr1-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[default-token-fwhqc test-data local-data]: timed out waiting for the condition
+ > 3m17s Warning FailedMount pod/aglais-panstarrs-dr1-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[test-data local-data default-token-fwhqc]: timed out waiting for the condition
+ > 37m Warning FailedMount pod/aglais-panstarrs-dr1-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[local-data default-token-fwhqc test-data]: timed out waiting for the condition
+ >
+ > ---- ----
+ > Share [2MASS]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-twomass-allsky-testpod Successfully assigned aglais-20210125/aglais-twomass-allsky-testpod to aglais-20210125-cluster-shr4k5gaja5a-node-2
+ > 76s Warning FailedMount pod/aglais-twomass-allsky-testpod MountVolume.MountDevice failed for volume "aglais-twomass-allsky-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-twomass-allsky-volume/globalmount: permission denied
+ > 27m Warning FailedMount pod/aglais-twomass-allsky-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[local-data default-token-fwhqc test-data]: timed out waiting for the condition
+ > 7m44s Warning FailedMount pod/aglais-twomass-allsky-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[test-data local-data default-token-fwhqc]: timed out waiting for the condition
+ > 30m Warning FailedMount pod/aglais-twomass-allsky-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[default-token-fwhqc test-data local-data]: timed out waiting for the condition
+
+ #
+ # All of them failed this time.
+ # All of them reported the same error this time.
+ #
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Delete everything.
+#[root@kubernator]
+
+ /openstack/bin/delete-all.sh \
+ "${cloudname:?}"
+
+ > ....
+ > ....
+
+# -----------------------------------------------------
+# Create everything.
+#[root@kubernator]
+
+ /kubernetes/bin/create-all.sh
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the results.
+#[root@kubernator]
+
+ cat '/tmp/aglais-status.yml'
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Get the cluster ID and K8s namespace.
+#[root@kubernator]
+
+ magnumid=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.openstack.magnum.uuid'
+ )
+
+ namespace=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.kubernetes.namespace'
+ )
+
+cat << EOF
+Magnum uuid [${magnumid}]
+Name space [${namespace}]
+EOF
+
+ > Magnum uuid [cc17f847-fb02-427b-909a-6750dbad2060]
+ > Name space [aglais-20210125]
+
+
+# -----------------------------------------------------
+# Check the test pod events for the data shares.
+#[root@kubernator]
+
+ sharelist='/common/manila/datashares.yaml'
+
+ for shareid in $(
+ yq read "${sharelist:?}" 'shares.[*].id'
+ )
+ do
+ echo ""
+ echo "---- ----"
+ echo "Share [${shareid:?}]"
+ echo "----"
+
+ sharename=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).sharename")
+ mountpath=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).mountpath")
+
+ kubectl \
+ --namespace "${namespace:?}" \
+ get event \
+ --field-selector "involvedObject.name=${sharename:?}-testpod"
+
+ done
+
+ > ---- ----
+ > Share [GDR2]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-gaia-dr2-testpod Successfully assigned aglais-20210125/aglais-gaia-dr2-testpod to aglais-20210125-cluster-bf3en5lv3e6a-node-0
+ > 90s Warning FailedMount pod/aglais-gaia-dr2-testpod MountVolume.MountDevice failed for volume "aglais-gaia-dr2-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-gaia-dr2-volume/globalmount: permission denied
+ > 97s Warning FailedMount pod/aglais-gaia-dr2-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[test-data local-data default-token-4zmrq]: timed out waiting for the condition
+ >
+ > ---- ----
+ > Share [GEDR3]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-gaia-edr3-testpod Successfully assigned aglais-20210125/aglais-gaia-edr3-testpod to aglais-20210125-cluster-bf3en5lv3e6a-node-1
+ > 78s Warning FailedMount pod/aglais-gaia-edr3-testpod MountVolume.MountDevice failed for volume "aglais-gaia-edr3-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-gaia-edr3-volume/globalmount: permission denied
+ > 86s Warning FailedMount pod/aglais-gaia-edr3-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[local-data default-token-4zmrq test-data]: timed out waiting for the condition
+ >
+ > ---- ----
+ > Share [ALLWISE]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-wise-allwise-testpod Successfully assigned aglais-20210125/aglais-wise-allwise-testpod to aglais-20210125-cluster-bf3en5lv3e6a-node-2
+ > 66s Warning FailedMount pod/aglais-wise-allwise-testpod MountVolume.MountDevice failed for volume "aglais-wise-allwise-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-wise-allwise-volume/globalmount: permission denied
+ > 73s Warning FailedMount pod/aglais-wise-allwise-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[test-data local-data default-token-4zmrq]: timed out waiting for the condition
+ >
+ > ---- ----
+ > Share [PS1]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-panstarrs-dr1-testpod Successfully assigned aglais-20210125/aglais-panstarrs-dr1-testpod to aglais-20210125-cluster-bf3en5lv3e6a-node-2
+ > 53s Warning FailedMount pod/aglais-panstarrs-dr1-testpod MountVolume.MountDevice failed for volume "aglais-panstarrs-dr1-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-panstarrs-dr1-volume/globalmount: permission denied
+ > 60s Warning FailedMount pod/aglais-panstarrs-dr1-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[test-data local-data default-token-4zmrq]: timed out waiting for the condition
+ >
+ > ---- ----
+ > Share [2MASS]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-twomass-allsky-testpod Successfully assigned aglais-20210125/aglais-twomass-allsky-testpod to aglais-20210125-cluster-bf3en5lv3e6a-node-3
+ > 40s Warning FailedMount pod/aglais-twomass-allsky-testpod MountVolume.MountDevice failed for volume "aglais-twomass-allsky-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-twomass-allsky-volume/globalmount: permission denied
+ > 47s Warning FailedMount pod/aglais-twomass-allsky-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[default-token-4zmrq test-data local-data]: timed out waiting for the condition
+ > [root@kubernator /]#
+
+
+ #
+ # All of them failed this time.
+ # All of them reported the same error this time.
+ #
+
+
+# -----------------------------------------------------
+# Check the test pod events for the user shares.
+#[root@kubernator]
+
+ sharelist='/common/manila/usershares.yaml'
+
+ for shareid in $(
+ yq read "${sharelist:?}" 'shares.[*].id'
+ )
+ do
+ echo ""
+ echo "---- ----"
+ echo "Share [${shareid:?}]"
+ echo "----"
+
+ sharename=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).sharename")
+ mountpath=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).mountpath")
+
+ kubectl \
+ --namespace "${namespace:?}" \
+ get event \
+ --field-selector "involvedObject.name=${sharename:?}-testpod"
+
+ done
+
+ > ---- ----
+ > Share [nch]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-user-nch-testpod Successfully assigned aglais-20210125/aglais-user-nch-testpod to aglais-20210125-cluster-bf3en5lv3e6a-node-0
+ > 6m1s Normal Pulling pod/aglais-user-nch-testpod Pulling image "fedora:latest"
+ > 5m54s Normal Pulled pod/aglais-user-nch-testpod Successfully pulled image "fedora:latest"
+ > 5m53s Normal Created pod/aglais-user-nch-testpod Created container aglais-user-nch-container
+ > 5m53s Normal Started pod/aglais-user-nch-testpod Started container aglais-user-nch-container
+ >
+ > ---- ----
+ > Share [zrq]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-user-zrq-testpod Successfully assigned aglais-20210125/aglais-user-zrq-testpod to aglais-20210125-cluster-bf3en5lv3e6a-node-2
+ > 5m50s Warning FailedMount pod/aglais-user-zrq-testpod MountVolume.MountDevice failed for volume "aglais-user-zrq-volume" : rpc error: code = Internal desc = failed to retrieve share ff351afd-1f06-4d02-9f53-cbe20b0676cc: Request forbidden: [GET https://cumulus.openstack.hpc.cam.ac.uk:8786/v2/08e24c6d87f94740aa59c172462ed927/shares/ff351afd-1f06-4d02-9f53-cbe20b0676cc], error message: {"forbidden": {"message": "Policy doesn't allow share:get to be performed.", "code": 403}}
+ > 100s Warning FailedMount pod/aglais-user-zrq-testpod MountVolume.MountDevice failed for volume "aglais-user-zrq-volume" : rpc error: code = InvalidArgument desc = stage secrets cannot be nil or empty
+ > 3m48s Warning FailedMount pod/aglais-user-zrq-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[test-data local-data default-token-4zmrq]: timed out waiting for the condition
+ > 90s Warning FailedMount pod/aglais-user-zrq-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[default-token-4zmrq test-data local-data]: timed out waiting for the condition
+ >
+ > ---- ----
+ > Share [stv]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-user-stv-testpod Successfully assigned aglais-20210125/aglais-user-stv-testpod to aglais-20210125-cluster-bf3en5lv3e6a-node-0
+ > 5m38s Warning FailedMount pod/aglais-user-stv-testpod MountVolume.MountDevice failed for volume "aglais-user-stv-volume" : rpc error: code = Internal desc = failed to retrieve share fe63568a-d90c-4fb0-8979-07504328809d: Request forbidden: [GET https://cumulus.openstack.hpc.cam.ac.uk:8786/v2/08e24c6d87f94740aa59c172462ed927/shares/fe63568a-d90c-4fb0-8979-07504328809d], error message: {"forbidden": {"message": "Policy doesn't allow share:get to be performed.", "code": 403}}
+ > 88s Warning FailedMount pod/aglais-user-stv-testpod MountVolume.MountDevice failed for volume "aglais-user-stv-volume" : rpc error: code = InvalidArgument desc = stage secrets cannot be nil or empty
+ > 3m36s Warning FailedMount pod/aglais-user-stv-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[default-token-4zmrq test-data local-data]: timed out waiting for the condition
+ > 81s Warning FailedMount pod/aglais-user-stv-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[local-data default-token-4zmrq test-data]: timed out waiting for the condition
+
+ #
+ # One worked, the rest failed.
+ # Same error rmessage, but different to the data shares.
+ #
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+
+ Looking at the Horizon GUI, the failed user shares were not public.
+ Updated the share properties, making all of them public.
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Delete everything.
+#[root@kubernator]
+
+ /openstack/bin/delete-all.sh \
+ "${cloudname:?}"
+
+ > ....
+ > ....
+
+# -----------------------------------------------------
+# Create everything.
+#[root@kubernator]
+
+ /kubernetes/bin/create-all.sh
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the results.
+#[root@kubernator]
+
+ cat '/tmp/aglais-status.yml'
+
+ > aglais:
+ > status:
+ > deployment:
+ > type: kubernetes
+ > name: aglais-20210125
+ > date: 20210125:140610
+ > openstack:
+ > cloudname: gaia-dev
+ > magnum:
+ > uuid: befc7a6f-57fd-4a8f-94a6-3694d20229b9
+ > kubernetes:
+ > namespace: aglais-20210125
+ > ingress:
+ > dashboard:
+ > hostname: dashboard.metagrid.xyz
+ > ipv4:
+ > zeppelin:
+ > hostname: zeppelin.metagrid.xyz
+ > ipv4: null
+
+
+# -----------------------------------------------------
+# Get the cluster ID and K8s namespace.
+#[root@kubernator]
+
+ magnumid=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.openstack.magnum.uuid'
+ )
+
+ namespace=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.kubernetes.namespace'
+ )
+
+cat << EOF
+Magnum uuid [${magnumid}]
+Name space [${namespace}]
+EOF
+
+ > Magnum uuid [befc7a6f-57fd-4a8f-94a6-3694d20229b9]
+ > Name space [aglais-20210125]
+
+
+# -----------------------------------------------------
+# Check the test pod events for the user shares.
+#[root@kubernator]
+
+ sharelist='/common/manila/usershares.yaml'
+
+ for shareid in $(
+ yq read "${sharelist:?}" 'shares.[*].id'
+ )
+ do
+ echo ""
+ echo "---- ----"
+ echo "Share [${shareid:?}]"
+ echo "----"
+
+ sharename=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).sharename")
+ mountpath=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).mountpath")
+
+ kubectl \
+ --namespace "${namespace:?}" \
+ get event \
+ --field-selector "involvedObject.name=${sharename:?}-testpod"
+
+ done
+
+ > ---- ----
+ > Share [nch]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-user-nch-testpod Successfully assigned aglais-20210125/aglais-user-nch-testpod to aglais-20210125-cluster-cmauzltjts5o-node-0
+ > 2m22s Normal Pulling pod/aglais-user-nch-testpod Pulling image "fedora:latest"
+ > 2m15s Normal Pulled pod/aglais-user-nch-testpod Successfully pulled image "fedora:latest"
+ > 2m14s Normal Created pod/aglais-user-nch-testpod Created container aglais-user-nch-container
+ > 2m14s Normal Started pod/aglais-user-nch-testpod Started container aglais-user-nch-container
+ >
+ > ---- ----
+ > Share [zrq]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-user-zrq-testpod Successfully assigned aglais-20210125/aglais-user-zrq-testpod to aglais-20210125-cluster-cmauzltjts5o-node-2
+ > 2m9s Normal Pulling pod/aglais-user-zrq-testpod Pulling image "fedora:latest"
+ > 2m1s Normal Pulled pod/aglais-user-zrq-testpod Successfully pulled image "fedora:latest"
+ > 2m1s Normal Created pod/aglais-user-zrq-testpod Created container aglais-user-zrq-container
+ > 2m1s Normal Started pod/aglais-user-zrq-testpod Started container aglais-user-zrq-container
+ >
+ > ---- ----
+ > Share [stv]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-user-stv-testpod Successfully assigned aglais-20210125/aglais-user-stv-testpod to aglais-20210125-cluster-cmauzltjts5o-node-2
+ > 116s Normal Pulling pod/aglais-user-stv-testpod Pulling image "fedora:latest"
+ > 112s Normal Pulled pod/aglais-user-stv-testpod Successfully pulled image "fedora:latest"
+ > 112s Normal Created pod/aglais-user-stv-testpod Created container aglais-user-stv-container
+ > 112s Normal Started pod/aglais-user-stv-testpod Started container aglais-user-stv-container
+
+ #
+ # OK - so they all worked.
+ # Needed to make them all public.
+ #
+
+
+# -----------------------------------------------------
+# Check the test pod events for the data shares.
+#[root@kubernator]
+
+ sharelist='/common/manila/datashares.yaml'
+
+ for shareid in $(
+ yq read "${sharelist:?}" 'shares.[*].id'
+ )
+ do
+ echo ""
+ echo "---- ----"
+ echo "Share [${shareid:?}]"
+ echo "----"
+
+ sharename=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).sharename")
+ mountpath=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).mountpath")
+
+ kubectl \
+ --namespace "${namespace:?}" \
+ get event \
+ --field-selector "involvedObject.name=${sharename:?}-testpod"
+
+ done
+
+
+ > ---- ----
+ > Share [GDR2]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-gaia-dr2-testpod Successfully assigned aglais-20210125/aglais-gaia-dr2-testpod to aglais-20210125-cluster-cmauzltjts5o-node-0
+ > 2m7s Warning FailedMount pod/aglais-gaia-dr2-testpod MountVolume.MountDevice failed for volume "aglais-gaia-dr2-volume" : kubernetes.io/csi: attacher.MountDevice failed to create newCsiDriverClient: driver name cephfs.manila.csi.openstack.org not found in the list of registered CSI drivers
+ > 6s Warning FailedMount pod/aglais-gaia-dr2-testpod MountVolume.MountDevice failed for volume "aglais-gaia-dr2-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-gaia-dr2-volume/globalmount: permission denied
+ > 12s Warning FailedMount pod/aglais-gaia-dr2-testpod Unable to attach or mount volumes: unmounted volumes=[test-data], unattached volumes=[local-data default-token-vc7cp test-data]: timed out waiting for the condition
+ >
+ > ---- ----
+ > Share [GEDR3]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-gaia-edr3-testpod Successfully assigned aglais-20210125/aglais-gaia-edr3-testpod to aglais-20210125-cluster-cmauzltjts5o-node-0
+ > 57s Warning FailedMount pod/aglais-gaia-edr3-testpod MountVolume.MountDevice failed for volume "aglais-gaia-edr3-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-gaia-edr3-volume/globalmount: permission denied
+ >
+ > ---- ----
+ > Share [ALLWISE]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-wise-allwise-testpod Successfully assigned aglais-20210125/aglais-wise-allwise-testpod to aglais-20210125-cluster-cmauzltjts5o-node-1
+ > 44s Warning FailedMount pod/aglais-wise-allwise-testpod MountVolume.MountDevice failed for volume "aglais-wise-allwise-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-wise-allwise-volume/globalmount: permission denied
+ >
+ > ---- ----
+ > Share [PS1]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-panstarrs-dr1-testpod Successfully assigned aglais-20210125/aglais-panstarrs-dr1-testpod to aglais-20210125-cluster-cmauzltjts5o-node-1
+ > 32s Warning FailedMount pod/aglais-panstarrs-dr1-testpod MountVolume.MountDevice failed for volume "aglais-panstarrs-dr1-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-panstarrs-dr1-volume/globalmount: permission denied
+ >
+ > ---- ----
+ > Share [2MASS]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-twomass-allsky-testpod Successfully assigned aglais-20210125/aglais-twomass-allsky-testpod to aglais-20210125-cluster-cmauzltjts5o-node-3
+ > 20s Warning FailedMount pod/aglais-twomass-allsky-testpod MountVolume.MountDevice failed for volume "aglais-twomass-allsky-volume" : rpc error: code = Internal desc = chmod /var/lib/kubelet/plugins/kubernetes.io/csi/pv/aglais-twomass-allsky-volume/globalmount: permission denied
+
+
+ #
+ # First one is a glitch - the rest are as before.
+ #
+
+ #
+ # Difference between user shares and data shares ?
+ # We create the data shares as ro and user shares as rw.
+ # The cephfs-mount script the read/write mode to select the access rule.
+ # ... but due to earlier issues with cephfs-csi, we always mount using ReadWriteMany.
+ # Hence the 'permission denied' errors ?
+ #
+ #
+
+ #
+ # Edit the create all script to mount them as rw.
+ #
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Delete everything.
+#[root@kubernator]
+
+ /openstack/bin/delete-all.sh \
+ "${cloudname:?}"
+
+ > ....
+ > ....
+
+# -----------------------------------------------------
+# Create everything.
+#[root@kubernator]
+
+ /kubernetes/bin/create-all.sh
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the results.
+#[root@kubernator]
+
+ cat '/tmp/aglais-status.yml'
+
+ > aglais:
+ > status:
+ > deployment:
+ > type: kubernetes
+ > name: aglais-20210125
+ > date: 20210125:145052
+ > openstack:
+ > cloudname: gaia-dev
+ > magnum:
+ > uuid: 96fc649b-f5be-4ac0-8293-59a4ffdf4e97
+ > kubernetes:
+ > namespace: aglais-20210125
+ > ingress:
+ > dashboard:
+ > hostname: dashboard.metagrid.xyz
+ > ipv4:
+ > zeppelin:
+ > hostname: zeppelin.metagrid.xyz
+ > ipv4: null
+
+# -----------------------------------------------------
+# Get the cluster ID and K8s namespace.
+#[root@kubernator]
+
+ magnumid=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.openstack.magnum.uuid'
+ )
+
+ namespace=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.kubernetes.namespace'
+ )
+
+cat << EOF
+Magnum uuid [${magnumid}]
+Name space [${namespace}]
+EOF
+
+ > Magnum uuid [96fc649b-f5be-4ac0-8293-59a4ffdf4e97]
+ > Name space [aglais-20210125]
+
+
+# -----------------------------------------------------
+# Check the test pod events for the user shares.
+#[root@kubernator]
+
+ sharelist='/common/manila/usershares.yaml'
+
+ for shareid in $(
+ yq read "${sharelist:?}" 'shares.[*].id'
+ )
+ do
+ echo ""
+ echo "---- ----"
+ echo "Share [${shareid:?}]"
+ echo "----"
+
+ sharename=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).sharename")
+ mountpath=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).mountpath")
+
+ kubectl \
+ --namespace "${namespace:?}" \
+ get event \
+ --field-selector "involvedObject.name=${sharename:?}-testpod"
+
+ done
+
+ > ---- ----
+ > Share [nch]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-user-nch-testpod Successfully assigned aglais-20210125/aglais-user-nch-testpod to aglais-20210125-cluster-jbncmdarhg4l-node-2
+ > 9m5s Normal Pulling pod/aglais-user-nch-testpod Pulling image "fedora:latest"
+ > 9m2s Normal Pulled pod/aglais-user-nch-testpod Successfully pulled image "fedora:latest"
+ > 9m2s Normal Created pod/aglais-user-nch-testpod Created container aglais-user-nch-container
+ > 9m2s Normal Started pod/aglais-user-nch-testpod Started container aglais-user-nch-container
+ >
+ > ---- ----
+ > Share [zrq]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-user-zrq-testpod Successfully assigned aglais-20210125/aglais-user-zrq-testpod to aglais-20210125-cluster-jbncmdarhg4l-node-3
+ > 8m53s Normal Pulling pod/aglais-user-zrq-testpod Pulling image "fedora:latest"
+ > 8m49s Normal Pulled pod/aglais-user-zrq-testpod Successfully pulled image "fedora:latest"
+ > 8m49s Normal Created pod/aglais-user-zrq-testpod Created container aglais-user-zrq-container
+ > 8m49s Normal Started pod/aglais-user-zrq-testpod Started container aglais-user-zrq-container
+ >
+ > ---- ----
+ > Share [stv]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-user-stv-testpod Successfully assigned aglais-20210125/aglais-user-stv-testpod to aglais-20210125-cluster-jbncmdarhg4l-node-1
+ > 8m39s Normal Pulling pod/aglais-user-stv-testpod Pulling image "fedora:latest"
+ > 8m36s Normal Pulled pod/aglais-user-stv-testpod Successfully pulled image "fedora:latest"
+ > 8m36s Normal Created pod/aglais-user-stv-testpod Created container aglais-user-stv-container
+ > 8m36s Normal Started pod/aglais-user-stv-testpod Started container aglais-user-stv-container
+
+
+# -----------------------------------------------------
+# Check the test pod events for the data shares.
+#[root@kubernator]
+
+ sharelist='/common/manila/datashares.yaml'
+
+ for shareid in $(
+ yq read "${sharelist:?}" 'shares.[*].id'
+ )
+ do
+ echo ""
+ echo "---- ----"
+ echo "Share [${shareid:?}]"
+ echo "----"
+
+ sharename=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).sharename")
+ mountpath=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).mountpath")
+
+ kubectl \
+ --namespace "${namespace:?}" \
+ get event \
+ --field-selector "involvedObject.name=${sharename:?}-testpod"
+
+ done
+
+ > ---- ----
+ > Share [GDR2]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-gaia-dr2-testpod Successfully assigned aglais-20210125/aglais-gaia-dr2-testpod to aglais-20210125-cluster-jbncmdarhg4l-node-1
+ > 10m Warning FailedMount pod/aglais-gaia-dr2-testpod MountVolume.MountDevice failed for volume "aglais-gaia-dr2-volume" : kubernetes.io/csi: attacher.MountDevice failed to create newCsiDriverClient: driver name cephfs.manila.csi.openstack.org not found in the list of registered CSI drivers
+ > 10m Normal Pulling pod/aglais-gaia-dr2-testpod Pulling image "fedora:latest"
+ > 10m Normal Pulled pod/aglais-gaia-dr2-testpod Successfully pulled image "fedora:latest"
+ > 10m Normal Created pod/aglais-gaia-dr2-testpod Created container aglais-gaia-dr2-container
+ > 10m Normal Started pod/aglais-gaia-dr2-testpod Started container aglais-gaia-dr2-container
+ >
+ > ---- ----
+ > Share [GEDR3]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-gaia-edr3-testpod Successfully assigned aglais-20210125/aglais-gaia-edr3-testpod to aglais-20210125-cluster-jbncmdarhg4l-node-2
+ > 10m Normal Pulling pod/aglais-gaia-edr3-testpod Pulling image "fedora:latest"
+ > 10m Normal Pulled pod/aglais-gaia-edr3-testpod Successfully pulled image "fedora:latest"
+ > 10m Normal Created pod/aglais-gaia-edr3-testpod Created container aglais-gaia-edr3-container
+ > 10m Normal Started pod/aglais-gaia-edr3-testpod Started container aglais-gaia-edr3-container
+ >
+ > ---- ----
+ > Share [ALLWISE]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-wise-allwise-testpod Successfully assigned aglais-20210125/aglais-wise-allwise-testpod to aglais-20210125-cluster-jbncmdarhg4l-node-0
+ > 10m Normal Pulling pod/aglais-wise-allwise-testpod Pulling image "fedora:latest"
+ > 9m53s Normal Pulled pod/aglais-wise-allwise-testpod Successfully pulled image "fedora:latest"
+ > 9m52s Normal Created pod/aglais-wise-allwise-testpod Created container aglais-wise-allwise-container
+ > 9m52s Normal Started pod/aglais-wise-allwise-testpod Started container aglais-wise-allwise-container
+ >
+ > ---- ----
+ > Share [PS1]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-panstarrs-dr1-testpod Successfully assigned aglais-20210125/aglais-panstarrs-dr1-testpod to aglais-20210125-cluster-jbncmdarhg4l-node-0
+ > 9m48s Normal Pulling pod/aglais-panstarrs-dr1-testpod Pulling image "fedora:latest"
+ > 9m45s Normal Pulled pod/aglais-panstarrs-dr1-testpod Successfully pulled image "fedora:latest"
+ > 9m45s Normal Created pod/aglais-panstarrs-dr1-testpod Created container aglais-panstarrs-dr1-container
+ > 9m45s Normal Started pod/aglais-panstarrs-dr1-testpod Started container aglais-panstarrs-dr1-container
+ >
+ > ---- ----
+ > Share [2MASS]
+ > ----
+ > LAST SEEN TYPE REASON OBJECT MESSAGE
+ > Normal Scheduled pod/aglais-twomass-allsky-testpod Successfully assigned aglais-20210125/aglais-twomass-allsky-testpod to aglais-20210125-cluster-jbncmdarhg4l-node-3
+ > 9m37s Normal Pulling pod/aglais-twomass-allsky-testpod Pulling image "fedora:latest"
+ > 9m29s Normal Pulled pod/aglais-twomass-allsky-testpod Successfully pulled image "fedora:latest"
+ > 9m28s Normal Created pod/aglais-twomass-allsky-testpod Created container aglais-twomass-allsky-container
+
+ #
+ # So the failed mounts were due to a combination of two things.
+ # Due to known issue with Cephfs CSI plugin, credentials don't work for anything other than ReadWriteMany.
+ # We were trying to use an Openstack 'ro' access rule to mount a CSI volume as 'ReadWriteMany'.
+ # => permission error
+ #
+ # We were trying to access a Openstack Manila share that wasn't public.
+ # => stage secrets cannot be nil or empty
+ #
+
+
+# -----------------------------------------------------
+# Check the CSI volumes, claims and testpods for the data volumes.
+#[root@kubernator]
+
+ sharelist='/common/manila/datashares.yaml'
+
+ for shareid in $(
+ yq read "${sharelist:?}" 'shares.[*].id'
+ )
+ do
+ echo ""
+ echo "---- ----"
+ echo "Share [${shareid:?}]"
+
+ sharename=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).sharename")
+ mountpath=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).mountpath")
+
+ podphase=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get pod \
+ --output json \
+ "${sharename:?}-testpod" \
+ | jq -r '.status.phase'
+ )
+
+ volphase=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get PersistentVolume \
+ --output json \
+ "${sharename:?}-volume" \
+ | jq -r '.status.phase'
+ )
+
+ claimphase=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get PersistentVolumeClaim \
+ --output json \
+ "${sharename:?}-claim" \
+ | jq -r '.status.phase'
+ )
+
+ echo "Testpod [${podphase}]"
+ echo "Volume [${volphase}]"
+ echo "Claim [${claimphase}]"
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ "aglais.status.kubernetes.csi-manila.${sharename:?}.testpod" \
+ "${podphase}"
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ "aglais.status.kubernetes.csi-manila.${sharename:?}.volume" \
+ "${volphase}"
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ "aglais.status.kubernetes.csi-manila.${sharename:?}.claim" \
+ "${claimphase}"
+
+ echo "----"
+ kubectl \
+ --namespace "${namespace:?}" \
+ exec \
+ --tty \
+ --stdin \
+ "${sharename:?}-testpod" \
+ -- \
+ /usr/bin/df -h "${mountpath:?}"
+ echo "----"
+
+ done
+
+ > ---- ----
+ > Share [GDR2]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 512G 473G 40G 93% /data/gaia/dr2
+ > ----
+ >
+ > ---- ----
+ > Share [GEDR3]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 540G 533G 7.9G 99% /data/gaia/edr3
+ > ----
+ >
+ > ---- ----
+ > Share [ALLWISE]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 350G 341G 9.9G 98% /data/wise/allwise
+ > ----
+ >
+ > ---- ----
+ > Share [PS1]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 300G 270G 31G 90% /data/panstarrs/dr1
+ > ----
+ >
+ > ---- ----
+ > Share [2MASS]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 40G 37G 3.5G 92% /data/twomass/allsky
+ > ----
+
+
+# -----------------------------------------------------
+# Check the CSI volumes, claims and testpods for the user volumes.
+#[root@kubernator]
+
+ sharelist='/common/manila/usershares.yaml'
+
+ for shareid in $(
+ yq read "${sharelist:?}" 'shares.[*].id'
+ )
+ do
+ echo ""
+ echo "---- ----"
+ echo "Share [${shareid:?}]"
+
+ sharename=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).sharename")
+ mountpath=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).mountpath")
+
+ podphase=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get pod \
+ --output json \
+ "${sharename:?}-testpod" \
+ | jq -r '.status.phase'
+ )
+
+ volphase=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get PersistentVolume \
+ --output json \
+ "${sharename:?}-volume" \
+ | jq -r '.status.phase'
+ )
+
+ claimphase=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get PersistentVolumeClaim \
+ --output json \
+ "${sharename:?}-claim" \
+ | jq -r '.status.phase'
+ )
+
+ echo "Testpod [${podphase}]"
+ echo "Volume [${volphase}]"
+ echo "Claim [${claimphase}]"
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ "aglais.status.kubernetes.csi-manila.${sharename:?}.testpod" \
+ "${podphase}"
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ "aglais.status.kubernetes.csi-manila.${sharename:?}.volume" \
+ "${volphase}"
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ "aglais.status.kubernetes.csi-manila.${sharename:?}.claim" \
+ "${claimphase}"
+
+ echo "----"
+ kubectl \
+ --namespace "${namespace:?}" \
+ exec \
+ --tty \
+ --stdin \
+ "${sharename:?}-testpod" \
+ -- \
+ /usr/bin/df -h "${mountpath:?}"
+ echo "----"
+
+ done
+
+ > ---- ----
+ > Share [nch]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 10T 4.9T 5.2T 49% /user/nch
+ > ----
+ >
+ > ---- ----
+ > Share [zrq]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 1.0T 30G 995G 3% /user/zrq
+ > ----
+ >
+ > ---- ----
+ > Share [stv]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 1.0T 0 1.0T 0% /user/stv
+ > ----
+
+
+# -----------------------------------------------------
+# Check our results
+#[root@kubernator]
+
+ cat /tmp/aglais-status.yml
+
+ > aglais:
+ > status:
+ > deployment:
+ > type: kubernetes
+ > name: aglais-20210125
+ > date: 20210125:145052
+ > openstack:
+ > cloudname: gaia-dev
+ > magnum:
+ > uuid: 96fc649b-f5be-4ac0-8293-59a4ffdf4e97
+ > kubernetes:
+ > namespace: aglais-20210125
+ > ingress:
+ > dashboard:
+ > hostname: dashboard.metagrid.xyz
+ > ipv4:
+ > zeppelin:
+ > hostname: zeppelin.metagrid.xyz
+ > ipv4: null
+ > csi-manila:
+ > aglais-gaia-dr2:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-gaia-edr3:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-wise-allwise:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-panstarrs-dr1:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-twomass-allsky:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-user-nch:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-user-zrq:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-user-stv:
+ > testpod: Running
+ > volume: Bound
+
+
diff --git a/notes/zrq/20210127-01-kubernetes-deploy.txt b/notes/zrq/20210127-01-kubernetes-deploy.txt
new file mode 100644
index 00000000..648ee825
--- /dev/null
+++ b/notes/zrq/20210127-01-kubernetes-deploy.txt
@@ -0,0 +1,465 @@
+#
+#
+#
+# Copyright (c) 2021, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+
+ Target:
+
+ Run the Kubernetes deployment.
+
+ Results:
+
+ Success.
+
+ TODO:
+
+ Store aglais-status in a persistent volume so it can be re-mounted.
+ Update our DNS records.
+
+
+# -----------------------------------------------------
+# Update the Openstack cloud name.
+#[user@desktop]
+
+ cloudname=gaia-dev
+
+ sed -i '
+ s/^\(AGLAIS_CLOUD\)=.*$/\1='${cloudname:?}'/
+ ' "${HOME}/aglais.env"
+
+# -----------------------------------------------------
+# Create a container to work with.
+# (*) extra volume mount for /common
+# (*) mount kubernetes directory as read/write
+#[user@desktop]
+
+ source "${HOME:?}/aglais.env"
+
+ podman run \
+ --rm \
+ --tty \
+ --interactive \
+ --name kubernator \
+ --hostname kubernator \
+ --env "SSH_AUTH_SOCK=/mnt/ssh_auth_sock" \
+ --volume "${SSH_AUTH_SOCK}:/mnt/ssh_auth_sock:rw,z" \
+ --env "cloudname=${AGLAIS_CLOUD:?}" \
+ --volume "${HOME:?}/clouds.yaml:/etc/openstack/clouds.yaml:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/common:/common:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/openstack:/openstack:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/kubernetes:/kubernetes:rw,z" \
+ atolmis/ansible-client:2020.12.02 \
+ bash
+
+
+# -----------------------------------------------------
+# Delete everything.
+#[root@kubernator]
+
+ /openstack/bin/delete-all.sh \
+ "${cloudname:?}"
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Create our Aglais configuration.
+#[root@kubernator]
+
+cat > '/tmp/aglais-config.yml' << EOF
+aglais:
+ version: 1.0
+ spec:
+ openstack:
+ cloudname: ${cloudname:?}
+ dashboard:
+ hostname: dashboard.metagrid.xyz
+ zeppelin:
+ hostname: zeppelin.metagrid.xyz
+ drupal:
+ hostname: drupal.metagrid.xyz
+EOF
+
+
+# -----------------------------------------------------
+# Create everything.
+#[root@kubernator]
+
+ /kubernetes/bin/create-all.sh
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the results.
+#[root@kubernator]
+
+ cat '/tmp/aglais-status.yml'
+
+ > aglais:
+ > status:
+ > deployment:
+ > type: kubernetes
+ > name: aglais-20210127
+ > date: 20210127:050320
+ > openstack:
+ > cloudname: gaia-dev
+ > magnum:
+ > uuid: 350c310b-f343-439f-b265-3b5ac7f9d903
+ > kubernetes:
+ > namespace: aglais-20210127
+ > ingress:
+ > dashboard:
+ > hostname: dashboard.metagrid.xyz
+ > ipv4: 128.232.227.236
+ > zeppelin:
+ > hostname: zeppelin.metagrid.xyz
+ > ipv4: null
+
+
+# -----------------------------------------------------
+# Get the cluster ID and K8s namespace.
+#[root@kubernator]
+
+ magnumid=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.openstack.magnum.uuid'
+ )
+
+ namespace=$(
+ yq read '/tmp/aglais-status.yml' 'aglais.status.kubernetes.namespace'
+ )
+
+cat << EOF
+Magnum uuid [${magnumid}]
+Name space [${namespace}]
+EOF
+
+ > Magnum uuid [350c310b-f343-439f-b265-3b5ac7f9d903]
+ > Name space [aglais-20210127]
+
+
+# -----------------------------------------------------
+# Check the CSI volumes, claims and testpods for the data volumes.
+#[root@kubernator]
+
+ sharelist='/common/manila/datashares.yaml'
+
+ for shareid in $(
+ yq read "${sharelist:?}" 'shares.[*].id'
+ )
+ do
+ echo ""
+ echo "---- ----"
+ echo "Share [${shareid:?}]"
+
+ sharename=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).sharename")
+ mountpath=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).mountpath")
+
+ podphase=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get pod \
+ --output json \
+ "${sharename:?}-testpod" \
+ | jq -r '.status.phase'
+ )
+
+ volphase=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get PersistentVolume \
+ --output json \
+ "${sharename:?}-volume" \
+ | jq -r '.status.phase'
+ )
+
+ claimphase=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get PersistentVolumeClaim \
+ --output json \
+ "${sharename:?}-claim" \
+ | jq -r '.status.phase'
+ )
+
+ echo "Testpod [${podphase}]"
+ echo "Volume [${volphase}]"
+ echo "Claim [${claimphase}]"
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ "aglais.status.kubernetes.csi-manila.${sharename:?}.testpod" \
+ "${podphase}"
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ "aglais.status.kubernetes.csi-manila.${sharename:?}.volume" \
+ "${volphase}"
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ "aglais.status.kubernetes.csi-manila.${sharename:?}.claim" \
+ "${claimphase}"
+
+ echo "----"
+ kubectl \
+ --namespace "${namespace:?}" \
+ exec \
+ --tty \
+ --stdin \
+ "${sharename:?}-testpod" \
+ -- \
+ /usr/bin/df -h "${mountpath:?}"
+ echo "----"
+
+ done
+
+ > ---- ----
+ > Share [GDR2]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 512G 473G 40G 93% /data/gaia/dr2
+ > ----
+ >
+ > ---- ----
+ > Share [GEDR3]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 540G 533G 7.9G 99% /data/gaia/edr3
+ > ----
+ >
+ > ---- ----
+ > Share [ALLWISE]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 350G 341G 9.9G 98% /data/wise/allwise
+ > ----
+ >
+ > ---- ----
+ > Share [PS1]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 300G 270G 31G 90% /data/panstarrs/dr1
+ > ----
+ >
+ > ---- ----
+ > Share [2MASS]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 40G 37G 3.5G 92% /data/twomass/allsky
+ > ----
+
+
+# -----------------------------------------------------
+# Check the CSI volumes, claims and testpods for the user volumes.
+#[root@kubernator]
+
+ sharelist='/common/manila/usershares.yaml'
+
+ for shareid in $(
+ yq read "${sharelist:?}" 'shares.[*].id'
+ )
+ do
+ echo ""
+ echo "---- ----"
+ echo "Share [${shareid:?}]"
+
+ sharename=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).sharename")
+ mountpath=$(yq read "${sharelist:?}" "shares.(id==${shareid:?}).mountpath")
+
+ podphase=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get pod \
+ --output json \
+ "${sharename:?}-testpod" \
+ | jq -r '.status.phase'
+ )
+
+ volphase=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get PersistentVolume \
+ --output json \
+ "${sharename:?}-volume" \
+ | jq -r '.status.phase'
+ )
+
+ claimphase=$(
+ kubectl \
+ --namespace "${namespace:?}" \
+ get PersistentVolumeClaim \
+ --output json \
+ "${sharename:?}-claim" \
+ | jq -r '.status.phase'
+ )
+
+ echo "Testpod [${podphase}]"
+ echo "Volume [${volphase}]"
+ echo "Claim [${claimphase}]"
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ "aglais.status.kubernetes.csi-manila.${sharename:?}.testpod" \
+ "${podphase}"
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ "aglais.status.kubernetes.csi-manila.${sharename:?}.volume" \
+ "${volphase}"
+
+ yq write \
+ --inplace \
+ '/tmp/aglais-status.yml' \
+ "aglais.status.kubernetes.csi-manila.${sharename:?}.claim" \
+ "${claimphase}"
+
+ echo "----"
+ kubectl \
+ --namespace "${namespace:?}" \
+ exec \
+ --tty \
+ --stdin \
+ "${sharename:?}-testpod" \
+ -- \
+ /usr/bin/df -h "${mountpath:?}"
+ echo "----"
+
+ done
+
+ > ---- ----
+ > Share [nch]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 10T 4.9T 5.2T 49% /user/nch
+ > ----
+ >
+ > ---- ----
+ > Share [zrq]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 1.0T 30G 995G 3% /user/zrq
+ > ----
+ >
+ > ---- ----
+ > Share [stv]
+ > Testpod [Running]
+ > Volume [Bound]
+ > Claim [Bound]
+ > ----
+ > Filesystem Size Used Avail Use% Mounted on
+ > ceph-fuse 1.0T 0 1.0T 0% /user/stv
+ > ----
+
+
+# -----------------------------------------------------
+# Check our results
+#[root@kubernator]
+
+ cat /tmp/aglais-status.yml
+
+ > aglais:
+ > status:
+ > deployment:
+ > type: kubernetes
+ > name: aglais-20210127
+ > date: 20210127:050320
+ > openstack:
+ > cloudname: gaia-dev
+ > magnum:
+ > uuid: 350c310b-f343-439f-b265-3b5ac7f9d903
+ > kubernetes:
+ > namespace: aglais-20210127
+ > ingress:
+ > dashboard:
+ > hostname: dashboard.metagrid.xyz
+ > ipv4: 128.232.227.236
+ > zeppelin:
+ > hostname: zeppelin.metagrid.xyz
+ > ipv4: null
+ > csi-manila:
+ > aglais-gaia-dr2:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-gaia-edr3:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-wise-allwise:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-panstarrs-dr1:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-twomass-allsky:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-user-nch:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-user-zrq:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+ > aglais-user-stv:
+ > testpod: Running
+ > volume: Bound
+ > claim: Bound
+
+
diff --git a/notes/zrq/20210127-02-google-oauth-proxy.txt b/notes/zrq/20210127-02-google-oauth-proxy.txt
new file mode 100644
index 00000000..23666347
--- /dev/null
+++ b/notes/zrq/20210127-02-google-oauth-proxy.txt
@@ -0,0 +1,667 @@
+#
+#
+#
+# Copyright (c) 2021, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+
+ Target:
+
+ Install and test OAuthProxy with Google OAuth.
+
+ Results:
+
+ Success :-)
+
+ Source:
+
+ Automated K8s deployment.
+ notes/zrq/20210127-01-kubernetes-deploy.txt
+
+
+# -----------------------------------------------------
+# Create a container to work with.
+# (*) extra volume mount for /common
+# (*) mount kubernetes directory as read/write
+#[user@desktop]
+
+ source "${HOME:?}/aglais.env"
+
+ podman run \
+ --rm \
+ --tty \
+ --interactive \
+ --name kubernator \
+ --hostname kubernator \
+ --env "SSH_AUTH_SOCK=/mnt/ssh_auth_sock" \
+ --volume "${SSH_AUTH_SOCK}:/mnt/ssh_auth_sock:rw,z" \
+ --env "cloudname=${AGLAIS_CLOUD:?}" \
+ --volume "${HOME:?}/clouds.yaml:/etc/openstack/clouds.yaml:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/common:/common:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/openstack:/openstack:ro,z" \
+ --volume "${AGLAIS_CODE:?}/experiments/kubernetes:/kubernetes:rw,z" \
+ atolmis/ansible-client:2020.12.02 \
+ bash
+
+
+# -----------------------------------------------------
+# Configure our secret function.
+#[root@kubernator]
+
+ mkdir "${HOME}/bin"
+
+ cat > "${HOME}/bin/secret" << 'EOF'
+ssh -n \
+ 'Zarquan@data.metagrid.co.uk' \
+ "bin/secret '${1}'"
+EOF
+
+ chmod a+x "${HOME}/bin/secret"
+
+ secret frog
+
+ # FAILs
+
+# -----------------------------------------------------
+# Test SSH access to the server.
+#[root@kubernator]
+
+ ssh -v Zarquan@data.metagrid.co.uk \
+ '
+ date
+ hostname
+ '
+
+ > ....
+ > ....
+ > debug1: Next authentication method: publickey
+ > debug1: Offering public key: /home/Zarquan/.ssh/zrq.metagrid.co.uk.rsa RSA SHA256:26sAWXfK3hzPzWHrZCqvhj6gKCkmbG/N2U9/AvZaHzI agent
+ > debug1: send_pubkey_test: no mutual signature algorithm
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Allow RSA keys.
+# https://dev.to/bowmanjd/upgrade-ssh-client-keys-and-remote-servers-after-fedora-33-s-new-crypto-policy-47ag
+#[root@kubernator]
+
+ cat >> "${HOME}/.ssh/config" << EOF
+# Allow RSA keys.
+# https://dev.to/bowmanjd/upgrade-ssh-client-keys-and-remote-servers-after-fedora-33-s-new-crypto-policy-47ag
+PubkeyAcceptedKeyTypes +ssh-rsa
+EOF
+
+
+# -----------------------------------------------------
+# Test SSH access to the server.
+#[root@kubernator]
+
+ ssh -v Zarquan@data.metagrid.co.uk \
+ '
+ date
+ hostname
+ '
+
+ > ....
+ > ....
+ > debug1: Next authentication method: publickey
+ > debug1: Offering public key: /home/Zarquan/.ssh/zrq.metagrid.co.uk.rsa RSA SHA256:26sAWXfK3hzPzWHrZCqvhj6gKCkmbG/N2U9/AvZaHzI agent
+ > debug1: Server accepts key: /home/Zarquan/.ssh/zrq.metagrid.co.uk.rsa RSA SHA256:26sAWXfK3hzPzWHrZCqvhj6gKCkmbG/N2U9/AvZaHzI agent
+ > debug1: Authentication succeeded (publickey).
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Test the secret function.
+#[root@kubernator]
+
+ secret frog
+
+ > Green Frog
+
+
+# -----------------------------------------------------
+# Get the connection details the first cluster in the list.
+#[root@kubernator]
+
+ clusterid=$(
+ openstack \
+ --os-cloud "${cloudname:?}" \
+ coe cluster list \
+ --format json \
+ | jq -r '.[0] | .uuid'
+ )
+
+ '/kubernetes/bin/cluster-config.sh' \
+ "${cloudname:?}" \
+ "${clusterid:?}"
+
+ kubectl \
+ cluster-info
+
+ > Kubernetes master is running at https://128.232.224.75:6443
+ > Heapster is running at https://128.232.224.75:6443/api/v1/namespaces/kube-system/services/heapster/proxy
+ > CoreDNS is running at https://128.232.224.75:6443/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy
+
+
+# -----------------------------------------------------
+# Get the name of the 'aglais' namespace.
+#[root@kubernator]
+
+ namespace=$(
+ kubectl \
+ get namespace \
+ --output json \
+ | jq -r '.items[] | .metadata.name | select(. | startswith("aglais"))'
+ )
+
+ echo "Namespace [${namespace}]"
+
+ > Namespace [aglais-20210127]
+
+
+# -----------------------------------------------------
+# Get a token for the dashboard account.
+#[root@kubernator]
+
+ secretname=$(
+ kubectl \
+ --output json \
+ --namespace "${namespace:?}" \
+ get ServiceAccount \
+ "aglais-dashboard-kubernetes-dashboard" \
+ | jq -r '.secrets[0].name'
+ )
+
+ dashtoken=$(
+ kubectl \
+ --output json \
+ --namespace "${namespace:?}" \
+ get Secret \
+ "${secretname:?}" \
+ | jq -r '.data.token | @base64d'
+ )
+
+ echo ${dashtoken:?}
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the ingress address.
+#[root@kubernator]
+
+ kubectl \
+ --namespace "${namespace}" \
+ get ingress
+
+ > NAME HOSTS ADDRESS PORTS AGE
+ > aglais-dashboard-kubernetes-dashboard dashboard.metagrid.xyz 128.232.227.227 80 44m
+ > zeppelin-server-ingress zeppelin.metagrid.xyz 128.232.227.227 80, 443 42m
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Update our internal and external DNS
+#[user@dns-serer]
+
+ aglais-001.metagrid.xyz A 128.232.227.227
+
+ vernon.metagrid.xyz CNAME aglais-001.metagrid.xyz.
+
+
+# -----------------------------------------------------
+# Check the zeppelin interface.
+#[root@kubernator]
+
+ curl --head --insecure 'https://zeppelin.metagrid.xyz/'
+
+ > HTTP/2 200
+ > date: Wed, 27 Jan 2021 05:38:25 GMT
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the dashboard interface.
+#[root@kubernator]
+
+ curl --head --insecure 'https://dashboard.metagrid.xyz/'
+
+ > HTTP/2 200
+ > date: Wed, 27 Jan 2021 05:38:59 GMT
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Get a copy of the Kubernetes NGINX Ingress project.
+# https://github.com/kubernetes/ingress-nginx.git
+#[user@kubernator]
+
+ dnf install -y git
+
+ cd ${HOME}
+ git clone https://github.com/kubernetes/ingress-nginx.git
+
+ > Cloning into 'ingress-nginx'...
+ > remote: Enumerating objects: 99890, done.
+ > remote: Total 99890 (delta 0), reused 0 (delta 0), pack-reused 99890
+ > Receiving objects: 100% (99890/99890), 114.20 MiB | 1.62 MiB/s, done.
+ > Resolving deltas: 100% (56462/56462), done.
+
+
+# -----------------------------------------------------
+# Deploy a test HTTP service.
+# https://github.com/kubernetes/ingress-nginx/blob/master/docs/examples/PREREQUISITES.md#test-http-service
+#[user@kubernator]
+
+ pushd "${HOME}/ingress-nginx"
+ pushd 'docs/examples'
+
+ kubectl create \
+ --filename http-svc.yaml
+
+ popd
+ popd
+
+ > deployment.apps/http-svc created
+ > service/http-svc created
+
+
+# -----------------------------------------------------
+# Configure our OAuth settings.
+#[user@kubernator]
+
+ deployname=google
+
+ deployhostname=vernon.metagrid.xyz
+ deployauthpath=agromulupt
+ deploycallback=https://${deployhostname:?}/${deployauthpath:?}/callback
+
+
+# -----------------------------------------------------
+# Create our SSL keys and store them in a Kubernetes secret.
+# https://github.com/kubernetes/ingress-nginx/blob/master/docs/examples/PREREQUISITES.md#tls-certificates
+#[user@kubernator]
+
+ dnf install -y openssl
+
+ openssl req \
+ -x509 \
+ -sha256 \
+ -nodes \
+ -days 365 \
+ -newkey rsa:2048 \
+ -keyout /tmp/tls.key \
+ -out /tmp/tls.crt \
+ -subj "/CN=${deployhostname:?}/O=Aglais"
+
+ > Generating a RSA private key
+ > ................................................+++++
+ > .................................+++++
+ > writing new private key to '/tmp/tls.key'
+
+
+ kubectl create secret \
+ tls \
+ ${deployname:?}-tls-secret \
+ --key /tmp/tls.key \
+ --cert /tmp/tls.crt
+
+ > secret/google-tls-secret created
+
+
+# -----------------------------------------------------
+# Deploy a TLS test Ingress
+# https://github.com/kubernetes/ingress-nginx/tree/master/docs/examples/tls-termination#deployment
+# https://github.com/kubernetes/ingress-nginx/tree/master/docs/examples/tls-termination#validation
+#[user@kubernator]
+
+ cat << EOF > /tmp/${deployname:?}-tls-test.yaml
+apiVersion: networking.k8s.io/v1beta1
+kind: Ingress
+metadata:
+ name: ${deployname:?}-tls-test
+spec:
+ tls:
+ - hosts:
+ - ${deployhostname}
+ secretName: ${deployname:?}-tls-secret
+ rules:
+ - host: ${deployhostname}
+ http:
+ paths:
+ - path: /tls-test
+ backend:
+ serviceName: http-svc
+ servicePort: 80
+EOF
+
+ kubectl apply \
+ --filename /tmp/${deployname:?}-tls-test.yaml
+
+ > ingress.networking.k8s.io/google-tls-test created
+
+
+ kubectl describe \
+ Ingress ${deployname:?}-tls-test
+
+ > Name: google-tls-test
+ > Namespace: default
+ > Address: 128.232.227.227
+ > Default backend: default-http-backend:80 ()
+ > TLS:
+ > google-tls-secret terminates vernon.metagrid.xyz
+ > Rules:
+ > Host Path Backends
+ > ---- ---- --------
+ > vernon.metagrid.xyz
+ > /tls-test http-svc:80 (10.100.1.14:8080)
+ > Annotations: Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Normal CREATE 12s nginx-ingress-controller Ingress default/google-tls-test
+ > Normal UPDATE 5s nginx-ingress-controller Ingress default/google-tls-test
+
+
+ ingressip=$(
+ kubectl get \
+ Ingress ${deployname:?}-tls-test \
+ --output json \
+ | jq -r '.status.loadBalancer.ingress[0].ip'
+ )
+
+ echo "Ingress [${ingressip:?}]"
+
+ > Ingress [128.232.227.227]
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Update our internal and external DNS
+#[user@dns-serer]
+
+ aglais-001.metagrid.xyz A 128.232.227.227
+
+ vernon.metagrid.xyz CNAME aglais-001.metagrid.xyz.
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Test our SSL keys.
+# https://github.com/kubernetes/ingress-nginx/tree/master/docs/examples/tls-termination#deployment
+# https://github.com/kubernetes/ingress-nginx/tree/master/docs/examples/tls-termination#validation
+#[user@kubernator]
+
+
+ curl --head "http://${ingressip:?}/tls-test"
+
+ > HTTP/1.1 404 Not Found
+ > Date: Wed, 27 Jan 2021 06:09:36 GMT
+ > ....
+ > ....
+
+
+ curl --head "http://${deployhostname:?}/tls-test"
+
+ > HTTP/1.1 308 Permanent Redirect
+ > Date: Wed, 27 Jan 2021 06:09:55 GMT
+ > ....
+ > ....
+ > Location: https://vernon.metagrid.xyz/tls-test
+
+
+ curl --head "https://${deployhostname:?}/tls-test"
+
+ > curl: (60) SSL certificate problem: self signed certificate
+ > More details here: https://curl.haxx.se/docs/sslcerts.html
+ > ....
+ > ....
+
+
+ curl --insecure --head "https://${deployhostname:?}/tls-test"
+
+ > HTTP/2 200
+ > date: Wed, 27 Jan 2021 06:10:44 GMT
+ > content-type: text/plain
+ > strict-transport-security: max-age=15724800; includeSubDomains
+
+
+# -----------------------------------------------------
+# Configure our Google secrets.
+#[user@kubernator]
+
+ dnf install -y python
+
+ OAUTH2_CLIENT_IDENT=$(
+ secret google.amdar.id
+ )
+ OAUTH2_CLIENT_SECRET=$(
+ secret google.amdar.secret
+ )
+ OAUTH2_COOKIE_SECRET=$(
+ python -c 'import os,base64; print(base64.b64encode(os.urandom(16)).decode("ascii"))'
+ )
+
+
+# -----------------------------------------------------
+# Configure our oauth2_proxy Service.
+#[user@kubernator]
+
+ cat > /tmp/${deployname:?}-oauth-proxy.yaml << EOF
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ labels:
+ k8s-app: ${deployname:?}-oauth-proxy
+ name: ${deployname:?}-oauth-proxy
+ namespace: default
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ k8s-app: ${deployname:?}-oauth-proxy
+ template:
+ metadata:
+ labels:
+ k8s-app: ${deployname:?}-oauth-proxy
+ spec:
+ containers:
+ - name: ${deployname:?}-oauth-proxy
+ image: quay.io/oauth2-proxy/oauth2-proxy:latest
+ imagePullPolicy: Always
+ ports:
+ - containerPort: 4180
+ protocol: TCP
+ args:
+ - --provider=google
+ - --email-domain=*
+ - --http-address=0.0.0.0:4180
+ - --proxy-prefix=/${deployauthpath:?}
+ - --set-xauthrequest=true
+ - --client-id=${OAUTH2_CLIENT_IDENT:?}
+ - --client-secret=${OAUTH2_CLIENT_SECRET:?}
+ - --cookie-secret=${OAUTH2_COOKIE_SECRET:?}
+---
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ k8s-app: ${deployname:?}-oauth-proxy
+ name: ${deployname:?}-oauth-proxy
+ namespace: default
+spec:
+ ports:
+ - name: http
+ port: 4180
+ protocol: TCP
+ targetPort: 4180
+ selector:
+ k8s-app: ${deployname:?}-oauth-proxy
+EOF
+
+
+# -----------------------------------------------------
+# Deploy the OAuth proxy.
+#[user@kubernator]
+
+ kubectl create \
+ --filename /tmp/${deployname:?}-oauth-proxy.yaml
+
+ > deployment.apps/google-oauth-proxy created
+ > service/google-oauth-proxy created
+
+
+# -----------------------------------------------------
+# Configure the oauth_proxy Ingress.
+#[user@kubernator]
+
+ #
+ # WARNING The auth-url and auth-signin URLs contain '$' values.
+ # WARNING If bash tries to fill them in, they will end up blank.
+ # https:///oauth2/auth
+ # https:///oauth2/start?rd
+ # WARNING This disables the authentication, leaving the protected resource exposed.
+ #
+
+ cat > /tmp/${deployname:?}-oauth-ingress.yaml << EOF
+---
+apiVersion: networking.k8s.io/v1beta1
+kind: Ingress
+metadata:
+ annotations:
+ nginx.ingress.kubernetes.io/auth-url: "https://\$host/${deployauthpath:?}/auth"
+ nginx.ingress.kubernetes.io/auth-signin: "https://\$host/${deployauthpath:?}/start?rd=\$escaped_request_uri"
+ nginx.ingress.kubernetes.io/auth-response-headers: "x-auth-request-user, x-auth-request-email"
+ name: ${deployname:?}-oauth-protected
+ namespace: default
+spec:
+ rules:
+ - host: ${deployhostname:?}
+ http:
+ paths:
+ - path: /
+ backend:
+ serviceName: http-svc
+ servicePort: 80
+ tls:
+ - hosts:
+ - ${deployhostname:?}
+ secretName: ${deployname:?}-tls-secret
+
+---
+apiVersion: networking.k8s.io/v1beta1
+kind: Ingress
+metadata:
+ name: ${deployname:?}-oauth-protector
+ namespace: default
+spec:
+ rules:
+ - host: ${deployhostname:?}
+ http:
+ paths:
+ - path: /${deployauthpath:?}
+ backend:
+ serviceName: ${deployname:?}-oauth-proxy
+ servicePort: 4180
+
+ tls:
+ - hosts:
+ - ${deployhostname:?}
+ secretName: ${deployname:?}-tls-secret
+EOF
+
+
+# -----------------------------------------------------
+# Deploy the OAuth Ingress connectors.
+#[user@kubernator]
+
+ kubectl apply \
+ --filename /tmp/${deployname:?}-oauth-ingress.yaml
+
+ > ingress.networking.k8s.io/google-oauth-protected created
+ > ingress.networking.k8s.io/google-oauth-protector created
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Test the deployment.
+#[user@desktop]
+
+ firefox "http://vernon.metagrid.xyz/frog" &
+
+
+ >
+ > Hostname: http-svc-66b7b8b4c6-9dgxg
+ >
+ > Pod Information:
+ > node name: aglais-20210127-cluster-bq7hhlqwjr57-node-3
+ > pod name: http-svc-66b7b8b4c6-9dgxg
+ > pod namespace: default
+ > pod IP: 10.100.1.14
+ >
+ > Server values:
+ > server_version=nginx: 1.12.2 - lua: 10010
+ >
+ > Request Information:
+ > client_address=10.100.3.3
+ > method=GET
+ > real path=/frog
+ > query=
+ > request_version=1.1
+ > request_scheme=http
+ > request_uri=http://vernon.metagrid.xyz:8080/frog
+ >
+ > Request Headers:
+ > accept=text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
+ > accept-encoding=gzip, deflate, br
+ > accept-language=en-GB,en;q=0.5
+ > cookie=_oauth2_proxy=NjUekxcc........oHvV9yC8=
+ > dnt=1
+ > host=vernon.metagrid.xyz
+ > referer=https://accounts.google.com/o/oauth2/auth/oauthchooseaccount....
+ > upgrade-insecure-requests=1
+ > user-agent=Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0
+ > x-auth-request-email=........
+ > x-auth-request-user=........
+ > x-forwarded-for=10.100.4.0
+ > x-forwarded-host=vernon.metagrid.xyz
+ > x-forwarded-port=443
+ > x-forwarded-proto=https
+ > x-real-ip=10.100.4.0
+ > x-request-id=6c52........95a1
+ > x-scheme=https
+ >
+ > Request Body:
+ > -no body in request-
+ >
+
+
+
+
+
diff --git a/notes/zrq/20210127-03-iris-oauth-proxy.txt b/notes/zrq/20210127-03-iris-oauth-proxy.txt
new file mode 100644
index 00000000..a28e13bb
--- /dev/null
+++ b/notes/zrq/20210127-03-iris-oauth-proxy.txt
@@ -0,0 +1,428 @@
+#
+#
+#
+# Copyright (c) 2021, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+
+ Target:
+
+ Install and test OAuthProxy with IRIS IAM.
+
+ Results:
+
+ Success :-)
+
+ Source:
+
+ Automated K8s deployment.
+ notes/zrq/20210127-01-kubernetes-deploy.txt
+
+ OAuthProxy deployment
+ notes/zrq/20210127-02-oauth-proxy-google.txt
+
+
+# -----------------------------------------------------
+# Configure our OAuth settings.
+#[user@kubernator]
+
+ deployname=iris
+
+ deployhostname=claire.metagrid.xyz
+ deployauthpath=pidjert
+ deploycallback=https://${deployhostname:?}/${deployauthpath:?}/callback
+
+
+# -----------------------------------------------------
+# Create our SSL keys and store them in a Kubernetes secret.
+# https://github.com/kubernetes/ingress-nginx/blob/master/docs/examples/PREREQUISITES.md#tls-certificates
+#[user@kubernator]
+
+ dnf install -y openssl
+
+ openssl req \
+ -x509 \
+ -sha256 \
+ -nodes \
+ -days 365 \
+ -newkey rsa:2048 \
+ -keyout /tmp/tls.key \
+ -out /tmp/tls.crt \
+ -subj "/CN=${deployhostname:?}/O=Aglais"
+
+ > Generating a RSA private key
+ > .........+++++
+ > .............................+++++
+ > writing new private key to '/tmp/tls.key'
+
+
+ kubectl create secret \
+ tls \
+ ${deployname:?}-tls-secret \
+ --key /tmp/tls.key \
+ --cert /tmp/tls.crt
+
+ > secret/iris-tls-secret created
+
+
+# -----------------------------------------------------
+# Deploy a TLS test Ingress
+# https://github.com/kubernetes/ingress-nginx/tree/master/docs/examples/tls-termination#deployment
+# https://github.com/kubernetes/ingress-nginx/tree/master/docs/examples/tls-termination#validation
+#[user@kubernator]
+
+ cat << EOF > /tmp/${deployname:?}-tls-test.yaml
+apiVersion: networking.k8s.io/v1beta1
+kind: Ingress
+metadata:
+ name: ${deployname:?}-tls-test
+spec:
+ tls:
+ - hosts:
+ - ${deployhostname}
+ secretName: ${deployname:?}-tls-secret
+ rules:
+ - host: ${deployhostname}
+ http:
+ paths:
+ - path: /tls-test
+ backend:
+ serviceName: http-svc
+ servicePort: 80
+EOF
+
+ kubectl apply \
+ --filename /tmp/${deployname:?}-tls-test.yaml
+
+ > ingress.networking.k8s.io/iris-tls-test created
+
+
+ kubectl describe \
+ Ingress ${deployname:?}-tls-test
+
+ > Name: iris-tls-test
+ > Namespace: default
+ > Address: 128.232.227.227
+ > Default backend: default-http-backend:80 ()
+ > TLS:
+ > iris-tls-secret terminates claire.metagrid.xyz
+ > Rules:
+ > Host Path Backends
+ > ---- ---- --------
+ > claire.metagrid.xyz
+ > /tls-test http-svc:80 (10.100.1.14:8080)
+ > Annotations: Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Normal CREATE 63s nginx-ingress-controller Ingress default/iris-tls-test
+ > Normal UPDATE 11s nginx-ingress-controller Ingress default/iris-tls-test
+
+
+ ingressip=$(
+ kubectl get \
+ Ingress ${deployname:?}-tls-test \
+ --output json \
+ | jq -r '.status.loadBalancer.ingress[0].ip'
+ )
+
+ echo "Ingress [${ingressip:?}]"
+
+ > Ingress [128.232.227.227]
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Update our internal and external DNS
+#[user@dns-serer]
+
+ aglais-001.metagrid.xyz A 128.232.227.236
+
+ claire.metagrid.xyz CNAME aglais-001.metagrid.xyz.
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Test HTTP to HTTPS redirect and our TLS keys.
+# https://github.com/kubernetes/ingress-nginx/tree/master/docs/examples/tls-termination#deployment
+# https://github.com/kubernetes/ingress-nginx/tree/master/docs/examples/tls-termination#validation
+#[user@kubernator]
+
+
+ curl --head "http://${ingressip:?}/tls-test"
+
+ > HTTP/1.1 404 Not Found
+ > Date: Wed, 27 Jan 2021 17:51:46 GMT
+ > ....
+ > ....
+
+
+ curl --head "http://${deployhostname:?}/tls-test"
+
+ > HTTP/1.1 308 Permanent Redirect
+ > Date: Wed, 27 Jan 2021 17:51:55 GMT
+ > ....
+ > ....
+ > Location: https://claire.metagrid.xyz/tls-test
+
+
+ curl --head "https://${deployhostname:?}/tls-test"
+
+ > curl: (60) SSL certificate problem: self signed certificate
+ > More details here: https://curl.haxx.se/docs/sslcerts.html
+ > ....
+ > ....
+
+
+ curl --insecure --head "https://${deployhostname:?}/tls-test"
+
+ > HTTP/2 200
+ > date: Wed, 27 Jan 2021 17:52:24 GMT
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Configure our IRIS secrets.
+#[user@kubernator]
+
+ dnf install -y python
+
+ OAUTH2_CLIENT_IDENT=$(
+ secret iris-iam.oauth.client
+ )
+ OAUTH2_CLIENT_SECRET=$(
+ secret iris-iam.oauth.secret
+ )
+ OAUTH2_COOKIE_SECRET=$(
+ python -c 'import os,base64; print(base64.b64encode(os.urandom(16)).decode("ascii"))'
+ )
+
+ OAUTH2_SERVER_ISSUER=https://iris-iam.stfc.ac.uk/
+ OAUTH2_SERVER_AUTH=https://iris-iam.stfc.ac.uk/authorize
+ OAUTH2_SERVER_TOKEN=https://iris-iam.stfc.ac.uk/token
+ OAUTH2_SERVER_USER=https://iris-iam.stfc.ac.uk/userinfo
+ OAUTH2_SERVER_JWKS=https://iris-iam.stfc.ac.uk/.well-known/openid-jwks
+
+# -----------------------------------------------------
+# Configure our oauth2_proxy Service.
+#[user@kubernator]
+
+ cat > /tmp/${deployname:?}-oauth-proxy.yaml << EOF
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ labels:
+ k8s-app: ${deployname:?}-oauth-proxy
+ name: ${deployname:?}-oauth-proxy
+ namespace: default
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ k8s-app: ${deployname:?}-oauth-proxy
+ template:
+ metadata:
+ labels:
+ k8s-app: ${deployname:?}-oauth-proxy
+ spec:
+ containers:
+ - name: ${deployname:?}-oauth-proxy
+ image: quay.io/oauth2-proxy/oauth2-proxy:latest
+ imagePullPolicy: Always
+ ports:
+ - containerPort: 4180
+ protocol: TCP
+ args:
+ - --provider=oidc
+ - --email-domain=*
+ - --http-address=0.0.0.0:4180
+ - --proxy-prefix=/${deployauthpath:?}
+ - --redirect-url=${deploycallback:?}
+ - --set-xauthrequest=true
+ - --client-id=${OAUTH2_CLIENT_IDENT:?}
+ - --client-secret=${OAUTH2_CLIENT_SECRET:?}
+ - --oidc-issuer-url=${OAUTH2_SERVER_ISSUER:?}
+ - --login-url=${OAUTH2_SERVER_AUTH:?}
+ - --redeem-url=${OAUTH2_SERVER_TOKEN:?}
+ - --profile-url=${OAUTH2_SERVER_USER:?}
+ - --oidc-jwks-url=${OAUTH2_SERVER_JWKS:?}
+ - --cookie-secret=${OAUTH2_COOKIE_SECRET:?}
+ - --ssl-insecure-skip-verify
+ - --ssl-upstream-insecure-skip-verify
+
+---
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ k8s-app: ${deployname:?}-oauth-proxy
+ name: ${deployname:?}-oauth-proxy
+ namespace: default
+spec:
+ ports:
+ - name: http
+ port: 4180
+ protocol: TCP
+ targetPort: 4180
+ selector:
+ k8s-app: ${deployname:?}-oauth-proxy
+EOF
+
+
+# -----------------------------------------------------
+# Deploy the OAuth proxy.
+#[user@kubernator]
+
+ kubectl create \
+ --filename /tmp/${deployname:?}-oauth-proxy.yaml
+
+ > deployment.apps/iris-oauth-proxy created
+ > service/iris-oauth-proxy created
+
+
+# -----------------------------------------------------
+# Configure the oauth_proxy Ingress.
+#[user@kubernator]
+
+ #
+ # WARNING The auth-url and auth-signin URLs contain '$' values.
+ # WARNING If bash tries to fill them in, they will end up blank.
+ # https:///oauth2/auth
+ # https:///oauth2/start?rd
+ # WARNING This disables the authentication, leaving the protected resource exposed.
+ #
+
+ cat > /tmp/${deployname:?}-oauth-ingress.yaml << EOF
+---
+apiVersion: networking.k8s.io/v1beta1
+kind: Ingress
+metadata:
+ annotations:
+ nginx.ingress.kubernetes.io/auth-url: "https://\$host/${deployauthpath:?}/auth"
+ nginx.ingress.kubernetes.io/auth-signin: "https://\$host/${deployauthpath:?}/start?rd=\$escaped_request_uri"
+ nginx.ingress.kubernetes.io/auth-response-headers: "x-auth-request-user, x-auth-request-email"
+ name: ${deployname:?}-oauth-protected
+ namespace: default
+spec:
+ rules:
+ - host: ${deployhostname:?}
+ http:
+ paths:
+ - path: /
+ backend:
+ serviceName: http-svc
+ servicePort: 80
+ tls:
+ - hosts:
+ - ${deployhostname:?}
+ secretName: ${deployname:?}-tls-secret
+
+---
+apiVersion: networking.k8s.io/v1beta1
+kind: Ingress
+metadata:
+ name: ${deployname:?}-oauth-protector
+ namespace: default
+spec:
+ rules:
+ - host: ${deployhostname:?}
+ http:
+ paths:
+ - path: /${deployauthpath:?}
+ backend:
+ serviceName: ${deployname:?}-oauth-proxy
+ servicePort: 4180
+ tls:
+ - hosts:
+ - ${deployhostname:?}
+ secretName: ${deployname:?}-tls-secret
+EOF
+
+
+# -----------------------------------------------------
+# Deploy the OAuth Ingress connectors.
+#[user@kubernator]
+
+ kubectl apply \
+ --filename /tmp/${deployname:?}-oauth-ingress.yaml
+
+ > ingress.networking.k8s.io/iris-oauth-protected created
+ > ingress.networking.k8s.io/iris-oauth-protector created
+
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+# Test the deployment.
+#[user@desktop]
+
+ firefox "http://claire.metagrid.xyz/frog" &
+
+ >
+ > Hostname: http-svc-66b7b8b4c6-9dgxg
+ >
+ > Pod Information:
+ > node name: aglais-20210127-cluster-bq7hhlqwjr57-node-3
+ > pod name: http-svc-66b7b8b4c6-9dgxg
+ > pod namespace: default
+ > pod IP: 10.100.1.14
+ >
+ > Server values:
+ > server_version=nginx: 1.12.2 - lua: 10010
+ >
+ > Request Information:
+ > client_address=10.100.3.3
+ > method=GET
+ > real path=/frog
+ > query=
+ > request_version=1.1
+ > request_scheme=http
+ > request_uri=http://claire.metagrid.xyz:8080/frog
+ >
+ > Request Headers:
+ > accept=text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
+ > accept-encoding=gzip, deflate, br
+ > accept-language=en-GB,en;q=0.5
+ > cookie=_oauth2_proxy=ivsL........PShs=
+ > dnt=1
+ > host=claire.metagrid.xyz
+ > upgrade-insecure-requests=1
+ > user-agent=Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0
+ > x-auth-request-email=........
+ > x-auth-request-user=........
+ > x-forwarded-for=10.100.4.0
+ > x-forwarded-host=claire.metagrid.xyz
+ > x-forwarded-port=443
+ > x-forwarded-proto=https
+ > x-real-ip=10.100.4.0
+ > x-request-id=f246........fd8d
+ > x-scheme=https
+ >
+ > Request Body:
+ > -no body in request-
+ >
+
+
+
+