diff --git a/deployments/common/pip/requirements.txt b/deployments/common/pip/requirements.txt index da802edf..bf2a893a 100644 --- a/deployments/common/pip/requirements.txt +++ b/deployments/common/pip/requirements.txt @@ -12,3 +12,4 @@ astroquery==0.4.1 scikit-learn==0.24.2 hdbscan==0.8.27 pyvo==1.1 +koalas==1.8.2 diff --git a/notes/stv/20220216-test-deploy-01.txt b/notes/stv/20220216-test-deploy-01.txt new file mode 100644 index 00000000..22f38784 --- /dev/null +++ b/notes/stv/20220216-test-deploy-01.txt @@ -0,0 +1,225 @@ +# +# +# +# Copyright (c) 2022, ROE (http://www.roe.ac.uk/) +# +# This information is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This information is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# + + + Target: + + Run Test Deploy on a version of GaiaDMP that includes Koalas + + Result: + + SUCCESS (BUT SLOW) + + + + +# ----------------------------------------------------- +# Fetch target branch +#[user@desktop] + + source "${HOME:?}/aglais.env" + pushd "${AGLAIS_CODE}" + git checkout 'feature/koalas' + + popd + + + +# ----------------------------------------------------- +# Create a container to work with. +#[user@desktop] + + source "${HOME:?}/aglais.env" + + podman run \ + --rm \ + --tty \ + --interactive \ + --name ansibler5 \ + --hostname ansibler \ + --publish 3000:3000 \ + --publish 8088:8088 \ + --env "SSH_AUTH_SOCK=/mnt/ssh_auth_sock" \ + --volume "${SSH_AUTH_SOCK}:/mnt/ssh_auth_sock:rw,z" \ + --volume "${HOME:?}/clouds.yaml:/etc/openstack/clouds.yaml:ro,z" \ + --volume "${AGLAIS_CODE:?}/deployments:/deployments:ro,z" \ + atolmis/ansible-client:2021.08.25 \ + bash + + + +# ----------------------------------------------------- +# Set the cloud and configuration. +#[root@ansibler] + + cloudname=iris-gaia-red + + configname=zeppelin-27.45-spark-6.27.45 + + + +# ----------------------------------------------------- +# Delete everything. +#[root@ansibler] + + time \ + /deployments/openstack/bin/delete-all.sh \ + "${cloudname:?}" + + > Done + + + +# ----------------------------------------------------- +# Create everything, using the new config. +#[root@ansibler] + + time \ + /deployments/hadoop-yarn/bin/create-all.sh \ + "${cloudname:?}" \ + "${configname:?}" \ + | tee /tmp/create-all.log + + + > Done + + +# ----------------------------------------------------- +# Run Full test +#[root@ansibler] + + num_users=1 + concurrent=False + test_level="full" + + # Restart Zeppelin + time \ + /deployments/hadoop-yarn/bin/restart-zeppelin.sh + + time \ + /deployments/hadoop-yarn/bin/run-tests.sh \ + "${cloudname:?}" \ + "${configname:?}" \ + "${test_level:?}" \ + ${concurrent:?} \ + ${num_users:?} \ + | tee /tmp/run-tests-full.log + + +# Running... + +> Done + +TASK [Run benchmarker] ************************************************************************************************************************************************************************************** +changed: [localhost] => {"changed": true, "cmd": "python3 /tmp/run-test.py | tee /tmp/test-result.json", "delta": "2:38:47.353090", "end": "2022-02-16 15:40:20.254404", "rc": 0, "start": "2022-02-16 13:01:32.901314", "stderr": "", "stderr_lines": [], "stdout": "Test completed after: 9527.15 seconds\n{'SetUp': {'totaltime': '40.78', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Mean_proper_motions_over_the_sky': {'totaltime': '56.25', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'Source_counts_over_the_sky.json': {'totaltime': '20.76', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Good_astrometric_solutions_via_ML_Random_Forrest_classifier': {'totaltime': '553.25', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'QC_cuts_dev.json': {'totaltime': '4493.28', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'WD_detection_dev.json': {'totaltime': '4356.36', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'Library_Validation.json': {'totaltime': '6.45', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}}", "stdout_lines": ["Test completed after: 9527.15 seconds", "{'SetUp': {'totaltime': '40.78', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Mean_proper_motions_over_the_sky': {'totaltime': '56.25', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'Source_counts_over_the_sky.json': {'totaltime': '20.76', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Good_astrometric_solutions_via_ML_Random_Forrest_classifier': {'totaltime': '553.25', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'QC_cuts_dev.json': {'totaltime': '4493.28', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'WD_detection_dev.json': {'totaltime': '4356.36', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'Library_Validation.json': {'totaltime': '6.45', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}}"]} + + + +# Results [Formatted] + +{ + 'SetUp': { + 'totaltime': '40.78', + 'status': 'SUCCESS', + 'msg': '', + 'valid': 'TRUE' + }, + 'Mean_proper_motions_over_the_sky': { + 'totaltime': '56.25', + 'status': 'SLOW', + 'msg': '', + 'valid': 'TRUE' + }, + 'Source_counts_over_the_sky.json': { + 'totaltime': '20.76', + 'status': 'SUCCESS', + 'msg': '', + 'valid': 'TRUE' + }, + 'Good_astrometric_solutions_via_ML_Random_Forrest_classifier': { + 'totaltime': '553.25', + 'status': 'SLOW', + 'msg': '', + 'valid': 'TRUE' + }, + 'QC_cuts_dev.json': { + 'totaltime': '4493.28', + 'status': 'SUCCESS', + 'msg': '', + 'valid': 'TRUE' + }, + 'WD_detection_dev.json': { + 'totaltime': '4356.36', + 'status': 'SLOW', + 'msg': '', + 'valid': 'TRUE' + }, + 'Library_Validation.json': { + 'totaltime': '6.45', + 'status': 'SUCCESS', + 'msg': '', + 'valid': 'TRUE' + } +} + + +# Note: WD Detections allways seems to be slower than our benchmarks expect. Do we need to update the value for this? + + +# Validate the Koalas package +# For this test we do this manually, once the PR is accepted, then we can include these cells in the Library Validation notebook in our "/aglais-testing" repo. + + +# Check pyarrow + +%pyspark + +import pyarrow +assert pyarrow.__version__ == "7.0.0" + + +> Success + + +# Check Koalas + +%pyspark + +import databricks.koalas as ks +assert ks.__version__ == "1.8.2" + +> Success + + + +# Create a Koalas series + +s = ks.Series([1, 3, 5, np.nan, 6, 8]) +s + +> 0 1.0 +> 1 3.0 +> 2 5.0 +> 3 NaN +> 4 6.0 +> 5 8.0 +> dtype: float64 +