diff --git a/cloudtools/init_notebook.py b/cloudtools/init_notebook.py index 2bbcbed27ee..8db8280fe90 100755 --- a/cloudtools/init_notebook.py +++ b/cloudtools/init_notebook.py @@ -61,15 +61,25 @@ hail_zip = custom_zip.rsplit('/')[-1] zip_path = custom_zip + # make local directory for Hail jar and zip + if not os.path.isdir('/home/hail/'): + os.mkdir('/home/hail/') + # copy Hail jar and zip to local directory on master node - call(['gsutil', 'cp', jar_path, '/usr/lib/spark/jars/']) - call(['gsutil', 'cp', zip_path, '/usr/lib/spark/python/']) + call(['gsutil', 'cp', jar_path, '/home/hail/']) + call(['gsutil', 'cp', zip_path, '/home/hail/']) + + # copy conf files to custom directory + if not os.path.isdir('/home/hail/conf/'): + os.mkdir('/home/hail/conf/') + call(['cp', '/etc/spark/conf/spark-defaults.conf', '/home/hail/conf/spark-defaults.conf']) + call(['cp', '/etc/spark/conf/spark-env.sh', '/home/hail/conf/spark-env.sh']) # modify custom Spark conf file to reference Hail jar and zip - with open('/etc/spark/conf/spark-defaults.conf', 'a') as f: + with open('/home/hail/conf/spark-defaults.conf', 'a') as f: opts = [ - 'spark.files=/usr/lib/spark/jars/{}'.format(hail_jar), - 'spark.submit.pyFiles=/usr/lib/spark/python/{}'.format(hail_zip), + 'spark.files=/home/hail/{}'.format(hail_jar), + 'spark.submit.pyFiles=/home/hail/{}'.format(hail_zip), 'spark.driver.extraClassPath=./{}'.format(hail_jar), 'spark.executor.extraClassPath=./{}'.format(hail_jar) ] @@ -89,8 +99,8 @@ 'env': { 'PYTHONHASHSEED': '0', 'SPARK_HOME': '/usr/lib/spark/', - 'SPARK_CONF_DIR': '/etc/spark/conf/', - 'PYTHONPATH': '/usr/lib/spark/python/:/usr/lib/spark/python/lib/py4j-0.10.3-src.zip:/usr/lib/spark/python/{}'.format(hail_zip) + 'SPARK_CONF_DIR': '/home/hail/conf/', + 'PYTHONPATH': '/usr/lib/spark/python/:/usr/lib/spark/python/lib/py4j-0.10.3-src.zip:/home/hail/{}'.format(hail_zip) } } @@ -128,7 +138,7 @@ 'Type=simple', 'User=root', 'Group=root', - 'WorkingDirectory=/usr/local/', + 'WorkingDirectory=/home/hail/', 'ExecStart=/usr/bin/python /usr/local/bin/jupyter notebook --allow-root', 'Restart=always', 'RestartSec=1', diff --git a/cloudtools/start.py b/cloudtools/start.py index c9f29e65020..45d1c2f43da 100644 --- a/cloudtools/start.py +++ b/cloudtools/start.py @@ -135,7 +135,10 @@ def main(args): # command to start cluster cmd = [ - 'gcloud', 'dataproc', 'clusters', 'create', + 'gcloud', + 'dataproc', + 'clusters', + 'create', args.name, '--image-version={}'.format(image_version), '--master-machine-type={}'.format(args.master_machine_type), @@ -153,5 +156,9 @@ def main(args): '--initialization-actions={}'.format(init_actions) ] + # print underlying gcloud command + print('gcloud command:') + print(' '.join(cmd[:5]) + ' \\\n ' + ' \\\n '.join(cmd[5:])) + # spin up cluster call(cmd) diff --git a/cloudtools/submit.py b/cloudtools/submit.py index f19bad849ba..0200f7bd8b2 100644 --- a/cloudtools/submit.py +++ b/cloudtools/submit.py @@ -11,7 +11,6 @@ def init_parser(parser): help='Hail version to use (default: %(default)s).') parser.add_argument('--jar', required=False, type=str, help='Custom Hail jar to use.') parser.add_argument('--zip', required=False, type=str, help='Custom Hail zip to use.') - parser.add_argument('--archives', required=False, type=str, help='Comma-separated list of archives (.zip/.tar/.tar.gz/.tvz) to be provided to the Hail application.') parser.add_argument('--files', required=False, type=str, help='Comma-separated list of files to add to the working directory of the Hail application.') parser.add_argument('--properties', '-p', required=False, type=str, help='Extra Spark properties to set.') parser.add_argument('--args', type=str, help='Quoted string of arguments to pass to the Hail script being submitted.') @@ -46,9 +45,6 @@ def main(args): if args.files: files += ',' + args.files - # create archives argument - archives = args.archives if args.archives else '' - # create properties argument properties = 'spark.driver.extraClassPath=./{0},spark.executor.extraClassPath=./{0}'.format(hail_jar) if args.properties: @@ -64,7 +60,6 @@ def main(args): args.script, '--cluster={}'.format(args.name), '--files={}'.format(files), - '--archives={}'.format(archives), '--py-files={}'.format(zip_path), '--properties={}'.format(properties) ] @@ -75,6 +70,9 @@ def main(args): for x in args.args.split(): cmd.append(x) - print(cmd) + # print underlying gcloud command + print('gcloud command:') + print(' '.join(cmd[:6]) + ' \\\n ' + ' \\\n '.join(cmd[6:])) + # submit job call(cmd) diff --git a/setup.py b/setup.py index c95d4098d04..9a998a5967b 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup setup(name='cloudtools', - version='1.1.3', + version='1.1.5', description='Collection of utilities for working on the Google Cloud Platform.', url='https://github.com/Nealelab/cloudtools', author='Liam Abbott',