wfau · Zarquan · Feb 22, 2022 · Feb 16, 2022 · Feb 16, 2022 · Zarquan
diff --git a/deployments/common/pip/requirements.txt b/deployments/common/pip/requirements.txt
@@ -12,3 +12,4 @@ astroquery==0.4.1
 scikit-learn==0.24.2
 hdbscan==0.8.27
 pyvo==1.1
+koalas==1.8.2
diff --git a/notes/stv/20220216-test-deploy-01.txt b/notes/stv/20220216-test-deploy-01.txt
@@ -0,0 +1,225 @@
+#
+# <meta:header>
+#   <meta:licence>
+#     Copyright (c) 2022, ROE (http://www.roe.ac.uk/)
+#
+#     This information is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU General Public License as published by
+#     the Free Software Foundation, either version 3 of the License, or
+#     (at your option) any later version.
+#
+#     This information is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU General Public License for more details.
+#
+#     You should have received a copy of the GNU General Public License
+#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#   </meta:licence>
+# </meta:header>
+#
+
+
+    Target:
+
+        Run Test Deploy on a version of GaiaDMP that includes Koalas
+
+    Result:
+
+        SUCCESS (BUT SLOW)
+
+
+
+
+# -----------------------------------------------------
+# Fetch target branch
+#[user@desktop]
+
+    source "${HOME:?}/aglais.env"
+    pushd "${AGLAIS_CODE}"
+      	git checkout 'feature/koalas'
+
+    popd
+
+
+
+# -----------------------------------------------------
+# Create a container to work with.
+#[user@desktop]
+
+    source "${HOME:?}/aglais.env"
+
+    podman run \
+        --rm \
+        --tty \
+        --interactive \
+        --name ansibler5 \
+        --hostname ansibler \
+        --publish 3000:3000 \
+        --publish 8088:8088 \
+        --env "SSH_AUTH_SOCK=/mnt/ssh_auth_sock" \
+        --volume "${SSH_AUTH_SOCK}:/mnt/ssh_auth_sock:rw,z" \
+        --volume "${HOME:?}/clouds.yaml:/etc/openstack/clouds.yaml:ro,z" \
+        --volume "${AGLAIS_CODE:?}/deployments:/deployments:ro,z" \
+        atolmis/ansible-client:2021.08.25 \
+        bash
+
+
+
+# -----------------------------------------------------
+# Set the cloud and configuration.
+#[root@ansibler]
+
+    cloudname=iris-gaia-red
+
+    configname=zeppelin-27.45-spark-6.27.45
+
+
+
+# -----------------------------------------------------
+# Delete everything.
+#[root@ansibler]
+
+    time \
+        /deployments/openstack/bin/delete-all.sh \
+            "${cloudname:?}"
+
+        > Done
+
+
+
+# -----------------------------------------------------
+# Create everything, using the new config.
+#[root@ansibler]
+
+    time \
+        /deployments/hadoop-yarn/bin/create-all.sh \
+            "${cloudname:?}" \
+            "${configname:?}"   \
+        | tee /tmp/create-all.log
+
+
+ 	> Done
+
+
+# -----------------------------------------------------
+# Run Full test
+#[root@ansibler]
+
+    num_users=1
+    concurrent=False
+    test_level="full"
+
+    # Restart Zeppelin
+    time \
+        /deployments/hadoop-yarn/bin/restart-zeppelin.sh
+
+    time \
+        /deployments/hadoop-yarn/bin/run-tests.sh \
+            "${cloudname:?}" \
+            "${configname:?}" \
+            "${test_level:?}"  \
+	     ${concurrent:?}  \
+	     ${num_users:?}  \
+        | tee /tmp/run-tests-full.log
+
+
+# Running...
+
+> Done
+
+TASK [Run benchmarker] **************************************************************************************************************************************************************************************
+changed: [localhost] => {"changed": true, "cmd": "python3 /tmp/run-test.py | tee /tmp/test-result.json", "delta": "2:38:47.353090", "end": "2022-02-16 15:40:20.254404", "rc": 0, "start": "2022-02-16 13:01:32.901314", "stderr": "", "stderr_lines": [], "stdout": "Test completed after: 9527.15 seconds\n{'SetUp': {'totaltime': '40.78', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Mean_proper_motions_over_the_sky': {'totaltime': '56.25', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'Source_counts_over_the_sky.json': {'totaltime': '20.76', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Good_astrometric_solutions_via_ML_Random_Forrest_classifier': {'totaltime': '553.25', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'QC_cuts_dev.json': {'totaltime': '4493.28', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'WD_detection_dev.json': {'totaltime': '4356.36', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'Library_Validation.json': {'totaltime': '6.45', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}}", "stdout_lines": ["Test completed after: 9527.15 seconds", "{'SetUp': {'totaltime': '40.78', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Mean_proper_motions_over_the_sky': {'totaltime': '56.25', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'Source_counts_over_the_sky.json': {'totaltime': '20.76', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Good_astrometric_solutions_via_ML_Random_Forrest_classifier': {'totaltime': '553.25', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'QC_cuts_dev.json': {'totaltime': '4493.28', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'WD_detection_dev.json': {'totaltime': '4356.36', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'Library_Validation.json': {'totaltime': '6.45', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}}"]}
+
+
+
+# Results [Formatted]
+
+{
+	'SetUp': {
+		'totaltime': '40.78',
+		'status': 'SUCCESS',
+		'msg': '',
+		'valid': 'TRUE'
+	},
+	'Mean_proper_motions_over_the_sky': {
+		'totaltime': '56.25',
+		'status': 'SLOW',
+		'msg': '',
+		'valid': 'TRUE'
+	},
+	'Source_counts_over_the_sky.json': {
+		'totaltime': '20.76',
+		'status': 'SUCCESS',
+		'msg': '',
+		'valid': 'TRUE'
+	},
+	'Good_astrometric_solutions_via_ML_Random_Forrest_classifier': {
+		'totaltime': '553.25',
+		'status': 'SLOW',
+		'msg': '',
+		'valid': 'TRUE'
+	},
+	'QC_cuts_dev.json': {
+		'totaltime': '4493.28',
+		'status': 'SUCCESS',
+		'msg': '',
+		'valid': 'TRUE'
+	},
+	'WD_detection_dev.json': {
+		'totaltime': '4356.36',
+		'status': 'SLOW',
+		'msg': '',
+		'valid': 'TRUE'
+	},
+	'Library_Validation.json': {
+		'totaltime': '6.45',
+		'status': 'SUCCESS',
+		'msg': '',
+		'valid': 'TRUE'
+	}
+}
+
+
+# Note: WD Detections allways seems to be slower than our benchmarks expect. Do we need to update the value for this?
+
+
+# Validate the Koalas package
+# For this test we do this manually, once the PR is accepted, then we can include these cells in the Library Validation notebook in our "/aglais-testing" repo.
+
+
+# Check pyarrow
+
+%pyspark
+
+import pyarrow
+assert pyarrow.__version__ == "7.0.0"
+
+
+> Success
+
+
+# Check Koalas
+
+%pyspark
+
+import databricks.koalas as ks
+assert ks.__version__ == "1.8.2"
+
+> Success
+
+
+
+# Create a Koalas series
+
+s = ks.Series([1, 3, 5, np.nan, 6, 8])
+s
+
+> 0    1.0
+> 1    3.0
+> 2    5.0
+> 3    NaN
+> 4    6.0
+> 5    8.0
+> dtype: float64
+