Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Koalas to GaiaDMP #640

Merged
merged 2 commits into from
Feb 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions deployments/common/pip/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ astroquery==0.4.1
scikit-learn==0.24.2
hdbscan==0.8.27
pyvo==1.1
koalas==1.8.2
225 changes: 225 additions & 0 deletions notes/stv/20220216-test-deploy-01.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
#
# <meta:header>
# <meta:licence>
# Copyright (c) 2022, ROE (http://www.roe.ac.uk/)
#
# This information is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This information is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# </meta:licence>
# </meta:header>
#


Target:

Run Test Deploy on a version of GaiaDMP that includes Koalas

Result:

SUCCESS (BUT SLOW)




# -----------------------------------------------------
# Fetch target branch
#[user@desktop]

source "${HOME:?}/aglais.env"
pushd "${AGLAIS_CODE}"
git checkout 'feature/koalas'

popd



# -----------------------------------------------------
# Create a container to work with.
#[user@desktop]

source "${HOME:?}/aglais.env"

podman run \
--rm \
--tty \
--interactive \
--name ansibler5 \
--hostname ansibler \
--publish 3000:3000 \
--publish 8088:8088 \
--env "SSH_AUTH_SOCK=/mnt/ssh_auth_sock" \
--volume "${SSH_AUTH_SOCK}:/mnt/ssh_auth_sock:rw,z" \
--volume "${HOME:?}/clouds.yaml:/etc/openstack/clouds.yaml:ro,z" \
--volume "${AGLAIS_CODE:?}/deployments:/deployments:ro,z" \
atolmis/ansible-client:2021.08.25 \
bash



# -----------------------------------------------------
# Set the cloud and configuration.
#[root@ansibler]

cloudname=iris-gaia-red

configname=zeppelin-27.45-spark-6.27.45



# -----------------------------------------------------
# Delete everything.
#[root@ansibler]

time \
/deployments/openstack/bin/delete-all.sh \
"${cloudname:?}"

> Done



# -----------------------------------------------------
# Create everything, using the new config.
#[root@ansibler]

time \
/deployments/hadoop-yarn/bin/create-all.sh \
"${cloudname:?}" \
"${configname:?}" \
| tee /tmp/create-all.log


> Done


# -----------------------------------------------------
# Run Full test
#[root@ansibler]

num_users=1
concurrent=False
test_level="full"

# Restart Zeppelin
time \
/deployments/hadoop-yarn/bin/restart-zeppelin.sh

time \
/deployments/hadoop-yarn/bin/run-tests.sh \
"${cloudname:?}" \
"${configname:?}" \
"${test_level:?}" \
${concurrent:?} \
${num_users:?} \
| tee /tmp/run-tests-full.log


# Running...

> Done

TASK [Run benchmarker] **************************************************************************************************************************************************************************************
changed: [localhost] => {"changed": true, "cmd": "python3 /tmp/run-test.py | tee /tmp/test-result.json", "delta": "2:38:47.353090", "end": "2022-02-16 15:40:20.254404", "rc": 0, "start": "2022-02-16 13:01:32.901314", "stderr": "", "stderr_lines": [], "stdout": "Test completed after: 9527.15 seconds\n{'SetUp': {'totaltime': '40.78', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Mean_proper_motions_over_the_sky': {'totaltime': '56.25', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'Source_counts_over_the_sky.json': {'totaltime': '20.76', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Good_astrometric_solutions_via_ML_Random_Forrest_classifier': {'totaltime': '553.25', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'QC_cuts_dev.json': {'totaltime': '4493.28', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'WD_detection_dev.json': {'totaltime': '4356.36', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'Library_Validation.json': {'totaltime': '6.45', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}}", "stdout_lines": ["Test completed after: 9527.15 seconds", "{'SetUp': {'totaltime': '40.78', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Mean_proper_motions_over_the_sky': {'totaltime': '56.25', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'Source_counts_over_the_sky.json': {'totaltime': '20.76', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Good_astrometric_solutions_via_ML_Random_Forrest_classifier': {'totaltime': '553.25', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'QC_cuts_dev.json': {'totaltime': '4493.28', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'WD_detection_dev.json': {'totaltime': '4356.36', 'status': 'SLOW', 'msg': '', 'valid': 'TRUE'}, 'Library_Validation.json': {'totaltime': '6.45', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}}"]}



# Results [Formatted]

{
'SetUp': {
'totaltime': '40.78',
'status': 'SUCCESS',
'msg': '',
'valid': 'TRUE'
},
'Mean_proper_motions_over_the_sky': {
'totaltime': '56.25',
'status': 'SLOW',
'msg': '',
'valid': 'TRUE'
},
'Source_counts_over_the_sky.json': {
'totaltime': '20.76',
'status': 'SUCCESS',
'msg': '',
'valid': 'TRUE'
},
'Good_astrometric_solutions_via_ML_Random_Forrest_classifier': {
'totaltime': '553.25',
'status': 'SLOW',
'msg': '',
'valid': 'TRUE'
},
'QC_cuts_dev.json': {
'totaltime': '4493.28',
'status': 'SUCCESS',
'msg': '',
'valid': 'TRUE'
},
'WD_detection_dev.json': {
'totaltime': '4356.36',
'status': 'SLOW',
'msg': '',
'valid': 'TRUE'
},
'Library_Validation.json': {
'totaltime': '6.45',
'status': 'SUCCESS',
'msg': '',
'valid': 'TRUE'
}
}


# Note: WD Detections allways seems to be slower than our benchmarks expect. Do we need to update the value for this?
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to review what these benchmark values mean and how to manage changes to them.
If we just change a value now, then what does that mean for all the tests run before this point ?



# Validate the Koalas package
# For this test we do this manually, once the PR is accepted, then we can include these cells in the Library Validation notebook in our "/aglais-testing" repo.


# Check pyarrow

%pyspark

import pyarrow
assert pyarrow.__version__ == "7.0.0"


> Success


# Check Koalas

%pyspark

import databricks.koalas as ks
assert ks.__version__ == "1.8.2"

> Success



# Create a Koalas series

s = ks.Series([1, 3, 5, np.nan, 6, 8])
s

> 0 1.0
> 1 3.0
> 2 5.0
> 3 NaN
> 4 6.0
> 5 8.0
> dtype: float64