Skip to content

Commit

Permalink
Merge pull request #583 from stvoutsin/issue-benchmarking
Browse files Browse the repository at this point in the history
Updates to Ansible scripts to include an (optional) benchmark test
  • Loading branch information
Zarquan authored Dec 1, 2021
2 parents 62aea82 + 3104073 commit cc05970
Show file tree
Hide file tree
Showing 11 changed files with 1,044 additions and 1 deletion.
1 change: 0 additions & 1 deletion deployments/hadoop-yarn/ansible/29-install-pip-libs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,3 @@
become: true
pip:
requirements: "/tmp/requirements.txt"

83 changes: 83 additions & 0 deletions deployments/hadoop-yarn/ansible/36-run-benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#
# <meta:header>
# <meta:licence>
# Copyright (c) 2020, ROE (http://www.roe.ac.uk/)
#
# This information is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This information is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# </meta:licence>
# </meta:header>
#

- name: "Get Zeppelin IP Address"
hosts: localhost
vars_files:
- config/ansible.yml
- /tmp/ansible-vars.yml
- config/openstack.yml

tasks:

- name: "Discover our Zeppelin node and store IP address in temp file"
os_server_info:
cloud: "{{ cloudname }}"
server: "{{ deployname }}-zeppelin"
register:
zeppelinnode

- local_action: copy content={{ zeppelinnode.openstack_servers[0].accessIPv4 }} dest=/tmp/zeppelin_ip.txt


- name: "Install and run Python benchmark suite"
hosts: localhost
gather_facts: yes
become: yes
become_method: sudo
vars_files:
- config/ansible.yml
- /tmp/ansible-vars.yml
vars:
zepipaddress: "{{ lookup('file', '/tmp/zeppelin_ip.txt') | trim }}"

tasks:

- name: "Creating our Zeppelin config file"
copy:
dest: "/tmp/user.yml"
content: |
zeppelin_url: http://{{ zepipaddress }}:8080
zeppelin_auth: true
zeppelin_user: gaiauser
zeppelin_password: gaiapass
- name: "Install git"
yum:
name: git
update_cache: yes
state: present

- pip:
name: 'git+https://github.com/wfau/aglais-testing@v0.1.2'
executable: pip

- name: "Creating our Benchmarking script"
copy:
dest: "/tmp/run-test.py"
content: |
import sys
from aglais_benchmark import AglaisBenchmarker
AglaisBenchmarker("/deployments/zeppelin/test/config/notebooks.json", "/tmp/").run(concurrent=False, users=1)
- name: "Run benchmarker"
command: python3 /tmp/run-test.py

18 changes: 18 additions & 0 deletions deployments/hadoop-yarn/bin/create-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
deployname="${cloudname:?}-$(date '+%Y%m%d')"
deploydate=$(date '+%Y%m%dT%H%M%S')

deploytype="${3:-prod}"

configyml='/tmp/aglais-config.yml'
statusyml='/tmp/aglais-status.yml'
touch "${statusyml:?}"
Expand Down Expand Up @@ -305,3 +307,19 @@

done

# -----------------------------------------------------
# Run Benchmarks

if [[ "$deploytype" == "test" ]]
then

pushd "/deployments/hadoop-yarn/ansible"

ansible-playbook \
--verbose \
--inventory "${inventory:?}" \
"36-run-benchmark.yml"

popd

fi
41 changes: 41 additions & 0 deletions deployments/zeppelin/test/config/notebooks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"notebooks" : [
{
"name" : "SetUp",
"filepath" : "https://raw.githubusercontent.com/wfau/aglais-testing/main/notebooks/public_examples/SetUp.json",
"totaltime" : 45,
"results" : []
},
{
"name" : "Mean_proper_motions_over_the_sky",
"filepath" : "https://raw.githubusercontent.com/wfau/aglais-testing/main/notebooks/public_examples/Mean_proper_motions_over_the_sky.json",
"totaltime" : 55,
"results" : []
},
{
"name" : "Source_counts_over_the_sky.json",
"filepath" : "https://raw.githubusercontent.com/wfau/aglais-testing/main/notebooks/public_examples/Source_counts_over_the_sky.json",
"totaltime" : 22,
"results" : []
},
{
"name" : "Good_astrometric_solutions_via_ML_Random_Forrest_classifier",
"filepath" : "https://raw.githubusercontent.com/wfau/aglais-testing/main/notebooks/public_examples/Good_astrometric_solutions_via_ML_Random_Forrest_classifier.json",
"totaltime" : 500,
"results" : []
},
{
"name" : "QC_cuts_dev.json",
"filepath" : "https://raw.githubusercontent.com/wfau/aglais-testing/main/notebooks/public_examples/QC_cuts_dev.json",
"totaltime" : 4700,
"results" : []
},
{
"name" : "WD_detection_dev.json",
"filepath" : "https://raw.githubusercontent.com/wfau/aglais-testing/main/notebooks/public_examples/WD_detection_dev.json",
"totaltime" : 3750,
"results" : []
}

]
}
12 changes: 12 additions & 0 deletions deployments/zeppelin/test/config/notebooks_pi.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"notebooks" : [
{
"name" : "pi_calculation",
"filepath" : "https://raw.githubusercontent.com/wfau/aglais-testing/main/notebooks/pi_calculation.json",
"totaltime" : 160,
"results" : [ "Pi is roughly 3.141854"]

}

]
}
198 changes: 198 additions & 0 deletions notes/stv/20210918-zeppelin-benchmarking-01.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
#
# <meta:header>
# <meta:licence>
# Copyright (c) 2021, ROE (http://www.roe.ac.uk/)
#
# This information is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This information is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# </meta:licence>
# </meta:header>
#
#

# Benchmarking the Zeppelin REST API
# ------------------------------------

# Summary of benchmarking suit:
We use the "zdairi" Zeppelin rest client to run our benchmarks in the following way:
- Create secrets file
- Create notebooks.json config where we store the list of our curated notebooks. Each entry has a link to the raw json file of the Zeppelin notebook

- For each notebook:
Fetch notebook json file store in temp folder on local machine
Create notebook in Zeppelin instance (Create under tmp directory in Zeppelin, i.e. change name to /tmp/notebook..)
Run notebook, and store the execution duration
Delete notebook from Zeppelin
Delete temporary notebook json file in local machine

- Return a dictionary of the Notebook ID's and the execution duration for each
- As a one off, run the benchmark suite, to get the execution time for each notebook using our test prototype



# zdairi is zeppelin CLI tool which wrapper zeppelin REST API for control notebook and interpreter.
# https://pypi.org/project/zdairi/
# Benchmarking suite can be found here: https://github.com/wfau/aglais-testing



# The following was run a local machine (Ubuntu).
# [Update] I have also tested on a remote VM which was also however an Ubuntu machine
# For the concurrent test, we need to create the users before hand in Zeppelin


# Install Python2.7 (Required by zdairi)
# user@local
# -----------------------------------
apt install python-minimal



# Clone Aglais-testing Github project
# user@local
# -----------------------------------
git clone https://github.com/wfau/aglais-testing
pushd aglais-testing



# Setup Virtualenv
# user@local
# --------------------

virtualenv --python=python2.7 mypython
source mypython/bin/activate



# Install zdairi
# (mypython) user@local
# -----------------------------

pip install zdairi



# Edit our secrets yaml files
# For a single user benchmark, we need to edit "user.yml"
# For a multi user test, we need to setup a yml for each concurrent user, numbered as: "user1.yml", "user2.yml ..."
# (mypython) user@local
# --------------------------------

nano config/zeppelin/user.yml
..
zeppelin_url: http://128.232.227.178:8080
zeppelin_auth: true
zeppelin_user: user
zeppelin_password: pass
..





# Optional: Edit the notebooks we want to test
# By default the project comes with two notebook configuration files, one containing a single notebook for a quick test, and one with the full list of notebooks
# (mypython) user@local
# ----------------------------------------------------------------------------


nano config/notebooks/notebook.json
..
{
"notebooks" : [
{
"name" : "SetUp",
"filepath" : "https://raw.githubusercontent.com/stvoutsin/aglais-testing/main/notebooks/SetUp.json",
"totaltime" : 400,
"results" : []
},
{
"name" : "Good_astrometric_solutions_via_ML_Random_Forrest_classifier",
"filepath" : "https://raw.githubusercontent.com/stvoutsin/aglais-testing/main/notebooks/Good_astrometric_solutions_via_ML_Random_Forrest_classifier.json",
"totaltime" : 900,
"results" : []
},
{
"name" : "Mean_proper_motions_over_the_sky",
"filepath" : "https://raw.githubusercontent.com/stvoutsin/aglais-testing/main/notebooks/Mean_proper_motions_over_the_sky.json",
"totaltime" : 400,
"results" : []
},
{
"name" : "Source_counts_over_the_sky.json",
"filepath" : "https://raw.githubusercontent.com/stvoutsin/aglais-testing/main/notebooks/Source_counts_over_the_sky.json",
"totaltime" : 1200,
"results" : []
}

]
}





..


# Navigate to src/
pushd src


# -- Test #1 --
# Run Single (Quick) pi calculation test
# (mypython) user@local
# --------------------------------------------------

python3
>>> from benchmark import Benchmarker
>>> Benchmarker("../config/notebooks/notebooks_quick_pi.json", "../config/zeppelin/").run(concurrent=False, users=1)


Expected Output: ['Pi is roughly 3.141854']
Actual output: ['Pi is roughly 3.141210']
-----------
Test completed after: 6.19 seconds
-----------
{'pi_quick': {'totaltime': '6.19', 'status': 'SUCCESS', 'valid': 'FALSE'}}



# -- Test #2 --
# Run Public examples tests
# (mypython) user@local
>>> from benchmark import Benchmarker
>>> Benchmarker("../config/notebooks/notebooks.json", "../config/zeppelin/").run(concurrent=False, users=1)
Test completed after: 929.94 seconds
-----------
{'SetUp': {'totaltime': '42.66', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Mean_proper_motions_over_the_sky': {'totaltime': '107.13', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Source_counts_over_the_sky.json': {'totaltime': '36.21', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Good_astrometric_solutions_via_ML_Random_Forrest_classifier': {'totaltime': '743.94', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}}



# -- Test #3 --
# Repeat Public examples tests
# (mypython) user@local
Test completed after: 845.26 seconds
-----------
{'SetUp': {'totaltime': '41.15', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Mean_proper_motions_over_the_sky': {'totaltime': '87.54', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Source_counts_over_the_sky.json': {'totaltime': '29.30', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}, 'Good_astrometric_solutions_via_ML_Random_Forrest_classifier': {'totaltime': '687.27', 'status': 'SUCCESS', 'msg': '', 'valid': 'TRUE'}}


# Duration looks about right:
743.94 / 60 = 12 minutes
687.27 / 60 = 11.45 minutes

# Let's set the template values for these notebooks to slightly above the max of the two
# Update the timing values in the configuration for these notebooks

# https://github.com/stvoutsin/aglais-testing/tree/main/notebooks/public_examples
Loading

0 comments on commit cc05970

Please sign in to comment.