wfau · Zarquan · Sep 21, 2021 · Sep 21, 2021 · Oct 4, 2021 · Oct 4, 2021
diff --git a/...-yarn/ansible/config/cclake-medium-04.yml → ...e/config/zeppelin-13.22-spark-4.13.22.yml b/...-yarn/ansible/config/cclake-medium-04.yml → ...e/config/zeppelin-13.22-spark-4.13.22.yml
diff --git a/.../hadoop-yarn/ansible/config/medium-04.yml → ...e/config/zeppelin-14.45-spark-4.14.45.yml b/.../hadoop-yarn/ansible/config/medium-04.yml → ...e/config/zeppelin-14.45-spark-4.14.45.yml
diff --git a/...p-yarn/ansible/config/cclake-large-06.yml → ...e/config/zeppelin-27.45-spark-6.27.45.yml b/...p-yarn/ansible/config/cclake-large-06.yml → ...e/config/zeppelin-27.45-spark-6.27.45.yml
diff --git a/deployments/hadoop-yarn/ansible/config/zeppelin-55.90-spark-6.27.45.yml b/deployments/hadoop-yarn/ansible/config/zeppelin-55.90-spark-6.27.45.yml
@@ -0,0 +1,280 @@
+#
+# <meta:header>
+#   <meta:licence>
+#     Copyright (c) 2020, ROE (http://www.roe.ac.uk/)
+#
+#     This information is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU General Public License as published by
+#     the Free Software Foundation, either version 3 of the License, or
+#     (at your option) any later version.
+#
+#     This information is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU General Public License for more details.
+#
+#     You should have received a copy of the GNU General Public License
+#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#   </meta:licence>
+# </meta:header>
+#
+#
+
+all:
+
+    vars:
+
+    # Hadoop vars
+
+        hdname: "hadoop-3.1.3"
+        hdbase: "/opt"
+        hdhome: "/opt/hadoop"
+
+        hdconf: "{{hdhome}}/etc/hadoop"
+        hdhost: "master01"
+        hduser: "fedora"
+
+    # HDFS vars
+
+        hdfsconf: "/var/hdfs/conf"
+        hdfsuser: "fedora"
+
+    # Spark vars
+
+        spname: "spark-2.4.7"
+        spfull: "spark-2.4.7-bin-hadoop2.7"
+        spbase: "/opt"
+        sphome: "/opt/spark"
+        sphost: "master01"
+        spuser: "fedora"
+
+    # Flavor sizes
+
+        zeppelinflavor: 'gaia.cclake.55vcpu'
+        masterflavor:   'gaia.cclake.2vcpu'
+        workerflavor:   'gaia.cclake.27vcpu'
+
+    # Flavour values
+
+        zeppelinmemory: 92160
+        zeppelincores: 55
+
+        workermemory: 46080
+        workercores: 27
+        workercount: 6
+
+# Calculated limits
+
+        spminmem:  1024
+        spmaxmem: "{{workermemory - 1024}}"
+
+        spmincores:  1
+        spmaxcores: "{{workercores}}"
+
+
+        sparkconfig: |
+
+            # https://spark.apache.org/docs/latest/configuration.html
+            # https://spark.apache.org/docs/latest/running-on-yarn.html
+            # https://stackoverflow.com/questions/37871194/how-to-tune-spark-executor-number-cores-and-executor-memory
+
+            spark.master                 yarn
+
+            # Spark config settings calculated using Cheatsheet.xlsx
+            # https://www.c2fo.io/img/apache-spark-config-cheatsheet/C2FO-Spark-Config-Cheatsheet.xlsx
+
+            # https://www.c2fo.io/c2fo/spark/aws/emr/2016/07/06/apache-spark-config-cheatsheet/
+            # https://github.com/AndresNamm/SparkDebugging/tree/master/ExecutorSizing
+
+            # Calculated using Cheatsheet.xlsx
+            spark.driver.memory                 58982m
+            spark.driver.memoryOverhead           9216
+            spark.driver.cores                       5
+            spark.driver.maxResultSize          40960m
+
+            spark.executor.memory                7168m
+            spark.executor.memoryOverhead         1024
+            spark.executor.cores                     5
+            #spark.executor.instances                30
+
+            spark.default.parallelism              300
+            #spark.sql.shuffle.partitions          300
+
+            # YARN Application Master settings
+            spark.yarn.am.memory                 2048m
+            spark.yarn.am.cores                      1
+
+            spark.dynamicAllocation.enabled          true
+            spark.shuffle.service.enabled            true
+            spark.dynamicAllocation.minExecutors      1
+             # spark.executor.instances from Cheatsheet
+            spark.dynamicAllocation.maxExecutors     30
+             # maxExecutors / 2
+            spark.dynamicAllocation.initialExecutors           15
+            spark.dynamicAllocation.cachedExecutorIdleTimeout  60s	
+            spark.dynamicAllocation.executorIdleTimeout        60s
+
+        yarnconfig: |
+            <!--+
+                | Maximum limit of memory to allocate to each container request at the Resource Manager.
+                +-->
+            <property>
+                <name>yarn.scheduler.maximum-allocation-mb</name>
+                <value>{{spmaxmem}}</value>
+            </property>
+
+            <!--+
+                | Minimum limit of memory to allocate to each container request at the Resource Manager.
+                +-->
+            <property>
+                <name>yarn.scheduler.minimum-allocation-mb</name>
+                <value>{{spminmem}}</value>
+            </property>
+
+            <property>
+                <name>yarn.scheduler.minimum-allocation-vcores</name>
+                <value>{{spmincores}}</value>
+            </property>
+
+            <property>
+                <name>yarn.scheduler.maximum-allocation-vcores</name>
+                <value>{{spmaxcores}}</value>
+            </property>
+
+            <property>
+                <name>yarn.nodemanager.resource.memory-mb</name>
+                <value>{{spmaxmem}}</value>
+            </property>
+
+            <!--+
+                | 1:1 -> 1:4 * {{spmaxcores}} based on IO wait
+                +-->
+            <property>
+                <name>yarn.nodemanager.resource.cpu-vcores</name>
+                <value>{{spmaxcores}}</value>
+            </property>
+
+            <!--+
+                | https://stackoverflow.com/questions/38988941/running-yarn-with-spark-not-working-with-java-8
+                | https://stackoverflow.com/a/39456782
+                | https://issues.apache.org/jira/browse/YARN-4714
+                +-->
+            <property>
+                <name>yarn.nodemanager.pmem-check-enabled</name>
+                <value>false</value>
+            </property>
+
+            <property>
+                <name>yarn.nodemanager.vmem-check-enabled</name>
+                <value>false</value>
+            </property>
+
+            #yarn.app.mapreduce.am.resource.mb = (yarn.scheduler.minimum-allocation-mb)
+            #mapreduce.map.memory.mb    = (multiple of yarn.scheduler.minimum-allocation-mb)
+            #mapreduce.reduce.memory.mb = (multiple of yarn.scheduler.minimum-allocation-mb)
+
+    # Zeppelin vars
+        zepname: "zeppelin-0.8.2"
+        zepbase: "/home/fedora"
+        zephome: "/home/fedora/zeppelin-0.8.2-bin-all"
+        zephost: "zeppelin"
+        zepuser: "fedora"
+        zepmavendest: "/var/local/zeppelin/maven"
+
+    hosts:
+
+        zeppelin:
+            login:  'fedora'
+            image:  'Fedora-30-1.2'
+            flavor: "{{zeppelinflavor}}"
+            discs:
+              - type: 'local'
+                format: 'ext4'
+                mntpath: "/mnt/local/vdb"
+                devname: 'vdb'
+              - type: 'cinder'
+                size: 1024
+                format: 'btrfs'
+                mntpath: "/mnt/cinder/vdc"
+                devname: 'vdc'
+            paths:
+                # Empty on Zeppelin
+                hddatalink: "/var/hadoop/data"
+                hddatadest: "/mnt/local/vdb/hadoop/data"
+                # Empty on Zeppelin
+                hdtemplink: "/var/hadoop/temp"
+                hdtempdest: "/mnt/local/vdb/hadoop/temp"
+                # Empty on Zeppelin
+                hdlogslink: "/var/hadoop/logs"
+                hdlogsdest: "/mnt/local/vdb/hadoop/logs"
+                # Used on Zeppelin
+                sptemplink: "/var/spark/temp"
+                sptempdest: "/mnt/cinder/vdc/spark/temp"
+
+        monitor:
+            login:  'fedora'
+            image:  'Fedora-30-1.2'
+            flavor: 'gaia.cclake.2vcpu'
+            discs: []
+
+    children:
+
+        masters:
+            hosts:
+                master[01:01]:
+            vars:
+                login:  'fedora'
+                image:  'Fedora-30-1.2'
+                flavor: "{{masterflavor}}"
+                discs: []
+                paths:
+                    # Empty on master
+                    hddatalink: "/var/hadoop/data"
+                    hddatadest: "/mnt/local/vda/hadoop/data"
+                    # Used on master
+                    # /var/hadoop/temp/dfs/namesecondary/current/
+                    hdtemplink: "/var/hadoop/temp"
+                    hdtempdest: "/mnt/local/vda/hadoop/temp"
+                    # Used on master
+                    hdlogslink: "/var/hadoop/logs"
+                    hdlogsdest: "/mnt/local/vda/hadoop/logs"
+                    # Used on master
+                    # /var/hdfs/meta/namenode/fsimage/current/
+                    hdfsmetalink: "/var/hdfs/meta"
+                    hdfsmetadest: "/mnt/local/vda/hadoop/meta"
+
+        workers:
+            hosts:
+                worker[01:06]:
+            vars:
+                login:  'fedora'
+                image:  'Fedora-30-1.2'
+                flavor: "{{workerflavor}}"
+                discs:
+                  - type: 'local'
+                    format: 'ext4'
+                    mntpath: "/mnt/local/vdb"
+                    devname: 'vdb'
+                  - type: 'cinder'
+                    size: 1024
+                    format: 'btrfs'
+                    mntpath: "/mnt/cinder/vdc"
+                    devname: 'vdc'
+                paths:
+                    # Used on workers
+                    hddatalink: "/var/hadoop/data"
+                    hddatadest: "/mnt/local/vdb/hadoop/data"
+                    # Used on workers
+                    # /var/hadoop/temp/nm-local-dir/
+                    hdtemplink: "/var/hadoop/temp"
+                    hdtempdest: "/mnt/local/vdb/hadoop/temp"
+                    # Used on workers
+                    hdlogslink: "/var/hadoop/logs"
+                    hdlogsdest: "/mnt/local/vdb/hadoop/logs"
+                    # Empty on workers
+                    hdfslogslink: "/var/hdfs/logs"
+                    hdfslogsdest: "/mnt/local/vdb/hdfs/logs"
+                    # Empty on workers
+                    hdfsdatalink: "/var/hdfs/data"
+                    hdfsdatadest: "/mnt/cinder/vdc/hdfs/data"
+
diff --git a/notes/zrq/20211007-02-slack-export.txt b/notes/zrq/20211007-02-slack-export.txt
@@ -26,8 +26,11 @@
 #zrq-notes-zeppelin
 #
 
-https://github.com/ErikKalkoken/slackchannel2pdf
+    How to export our data out of Slack ...
+
+    https://github.com/ErikKalkoken/slackchannel2pdf
+
+    https://webapps.stackexchange.com/questions/130485/how-to-export-slack-conversation-thread-without-admin-account
 
-https://webapps.stackexchange.com/questions/130485/how-to-export-slack-conversation-thread-without-admin-account