Skip to content

Commit

Permalink
Merge pull request hail-is#33 from Nealelab/change-jar-path
Browse files Browse the repository at this point in the history
Change jar path
  • Loading branch information
Liam Abbott authored Oct 2, 2017
2 parents fdc048c + e0f1275 commit 5f0c8e6
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 16 deletions.
26 changes: 18 additions & 8 deletions cloudtools/init_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,25 @@
hail_zip = custom_zip.rsplit('/')[-1]
zip_path = custom_zip

# make local directory for Hail jar and zip
if not os.path.isdir('/home/hail/'):
os.mkdir('/home/hail/')

# copy Hail jar and zip to local directory on master node
call(['gsutil', 'cp', jar_path, '/usr/lib/spark/jars/'])
call(['gsutil', 'cp', zip_path, '/usr/lib/spark/python/'])
call(['gsutil', 'cp', jar_path, '/home/hail/'])
call(['gsutil', 'cp', zip_path, '/home/hail/'])

# copy conf files to custom directory
if not os.path.isdir('/home/hail/conf/'):
os.mkdir('/home/hail/conf/')
call(['cp', '/etc/spark/conf/spark-defaults.conf', '/home/hail/conf/spark-defaults.conf'])
call(['cp', '/etc/spark/conf/spark-env.sh', '/home/hail/conf/spark-env.sh'])

# modify custom Spark conf file to reference Hail jar and zip
with open('/etc/spark/conf/spark-defaults.conf', 'a') as f:
with open('/home/hail/conf/spark-defaults.conf', 'a') as f:
opts = [
'spark.files=/usr/lib/spark/jars/{}'.format(hail_jar),
'spark.submit.pyFiles=/usr/lib/spark/python/{}'.format(hail_zip),
'spark.files=/home/hail/{}'.format(hail_jar),
'spark.submit.pyFiles=/home/hail/{}'.format(hail_zip),
'spark.driver.extraClassPath=./{}'.format(hail_jar),
'spark.executor.extraClassPath=./{}'.format(hail_jar)
]
Expand All @@ -89,8 +99,8 @@
'env': {
'PYTHONHASHSEED': '0',
'SPARK_HOME': '/usr/lib/spark/',
'SPARK_CONF_DIR': '/etc/spark/conf/',
'PYTHONPATH': '/usr/lib/spark/python/:/usr/lib/spark/python/lib/py4j-0.10.3-src.zip:/usr/lib/spark/python/{}'.format(hail_zip)
'SPARK_CONF_DIR': '/home/hail/conf/',
'PYTHONPATH': '/usr/lib/spark/python/:/usr/lib/spark/python/lib/py4j-0.10.3-src.zip:/home/hail/{}'.format(hail_zip)
}
}

Expand Down Expand Up @@ -128,7 +138,7 @@
'Type=simple',
'User=root',
'Group=root',
'WorkingDirectory=/usr/local/',
'WorkingDirectory=/home/hail/',
'ExecStart=/usr/bin/python /usr/local/bin/jupyter notebook --allow-root',
'Restart=always',
'RestartSec=1',
Expand Down
9 changes: 8 additions & 1 deletion cloudtools/start.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,10 @@ def main(args):

# command to start cluster
cmd = [
'gcloud', 'dataproc', 'clusters', 'create',
'gcloud',
'dataproc',
'clusters',
'create',
args.name,
'--image-version={}'.format(image_version),
'--master-machine-type={}'.format(args.master_machine_type),
Expand All @@ -153,5 +156,9 @@ def main(args):
'--initialization-actions={}'.format(init_actions)
]

# print underlying gcloud command
print('gcloud command:')
print(' '.join(cmd[:5]) + ' \\\n ' + ' \\\n '.join(cmd[5:]))

# spin up cluster
call(cmd)
10 changes: 4 additions & 6 deletions cloudtools/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def init_parser(parser):
help='Hail version to use (default: %(default)s).')
parser.add_argument('--jar', required=False, type=str, help='Custom Hail jar to use.')
parser.add_argument('--zip', required=False, type=str, help='Custom Hail zip to use.')
parser.add_argument('--archives', required=False, type=str, help='Comma-separated list of archives (.zip/.tar/.tar.gz/.tvz) to be provided to the Hail application.')
parser.add_argument('--files', required=False, type=str, help='Comma-separated list of files to add to the working directory of the Hail application.')
parser.add_argument('--properties', '-p', required=False, type=str, help='Extra Spark properties to set.')
parser.add_argument('--args', type=str, help='Quoted string of arguments to pass to the Hail script being submitted.')
Expand Down Expand Up @@ -46,9 +45,6 @@ def main(args):
if args.files:
files += ',' + args.files

# create archives argument
archives = args.archives if args.archives else ''

# create properties argument
properties = 'spark.driver.extraClassPath=./{0},spark.executor.extraClassPath=./{0}'.format(hail_jar)
if args.properties:
Expand All @@ -64,7 +60,6 @@ def main(args):
args.script,
'--cluster={}'.format(args.name),
'--files={}'.format(files),
'--archives={}'.format(archives),
'--py-files={}'.format(zip_path),
'--properties={}'.format(properties)
]
Expand All @@ -75,6 +70,9 @@ def main(args):
for x in args.args.split():
cmd.append(x)

print(cmd)
# print underlying gcloud command
print('gcloud command:')
print(' '.join(cmd[:6]) + ' \\\n ' + ' \\\n '.join(cmd[6:]))

# submit job
call(cmd)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup

setup(name='cloudtools',
version='1.1.3',
version='1.1.5',
description='Collection of utilities for working on the Google Cloud Platform.',
url='https://github.com/Nealelab/cloudtools',
author='Liam Abbott',
Expand Down

0 comments on commit 5f0c8e6

Please sign in to comment.