Skip to content

Commit

Permalink
Merge pull request hail-is#9 from Nealelab/custom_build
Browse files Browse the repository at this point in the history
added ability to specifiy custom Hail jar and zip for Jupyter noteboo…
  • Loading branch information
Liam Abbott authored Jun 14, 2017
2 parents de7f526 + 6e1e281 commit ddf4e48
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 6 deletions.
26 changes: 20 additions & 6 deletions init_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,31 @@
if not hash:
hash = Popen(['gsutil', 'cat', 'gs://hail-common/latest-hash-spark{}.txt'.format(spark)], stdout=PIPE, stderr=PIPE).communicate()[0].strip()

# Hail jar and zip names
hail_jar = 'hail-hail-is-master-all-spark{0}-{1}.jar'.format(spark, hash)
hail_zip = 'pyhail-hail-is-master-{}.zip'.format(hash)
# Hail jar
jar = Popen('/usr/share/google/get_metadata_value attributes/JAR', shell=True, stdout=PIPE).communicate()[0].strip()
if jar:
hail_jar = jar.rsplit('/')[-1]
jar_path = jar
else:
hail_jar = 'hail-hail-is-master-all-spark{0}-{1}.jar'.format(spark, hash)
jar_path = 'gs://hail-common/' + hail_jar

# Hail zip
zip = Popen('/usr/share/google/get_metadata_value attributes/ZIP', shell=True, stdout=PIPE).communicate()[0].strip()
if zip:
hail_zip = zip.rsplit('/')[-1]
zip_path = zip
else:
hail_zip = 'pyhail-hail-is-master-{}.zip'.format(hash)
zip_path = 'gs://hail-common/' + hail_zip

# make directory for Hail and Jupyter notebook related files
os.mkdir('/home/hail/')
os.chmod('/home/hail/', 0777)

# copy Hail jar and zip to local directory on master node
call(['gsutil', 'cp', 'gs://hail-common/{}'.format(hail_jar), '/home/hail/'])
call(['gsutil', 'cp', 'gs://hail-common/{}'.format(hail_zip), '/home/hail/'])
call(['gsutil', 'cp', jar_path, '/home/hail/'])
call(['gsutil', 'cp', zip_path, '/home/hail/'])

# create Jupyter kernel spec file
kernel = {
Expand All @@ -68,7 +82,7 @@
'PYTHONHASHSEED': '0',
'SPARK_HOME': '/usr/lib/spark/',
'SPARK_CONF_DIR': '/home/hail/conf/',
'PYTHONPATH': '/usr/lib/spark/python/:/usr/lib/spark/python/lib/py4j-0.10.3-src.zip:/home/hail/pyhail-hail-is-master-{}.zip'.format(hash)
'PYTHONPATH': '/usr/lib/spark/python/:/usr/lib/spark/python/lib/py4j-0.10.3-src.zip:/home/hail/{}'.format(hail_zip)
}
}

Expand Down
10 changes: 10 additions & 0 deletions start_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
parser.add_argument('--zone', default='us-central1-b', type=str, help='Compute zone for the cluster.')
parser.add_argument('--properties', default='', type=str, help='Additional configuration properties for the cluster.')

# specify custom Hail jar and zip
parser.add_argument('--jar', default='', type=str, help='Hail jar to use for Jupyter notebook.')
parser.add_argument('--zip', default='', type=str, help='Hail zip to use for Jupyter notebook.')

# initialization action flags
parser.add_argument('--init', default='gs://hail-common/init_notebook.py', help='comma-separated list of init scripts to run.')
parser.add_argument('--vep', action='store_true')
Expand Down Expand Up @@ -97,6 +101,12 @@
# prepare metadata values
metadata = 'HASH={0},SPARK={1}'.format(hail_hash, args.spark)

# if Hail jar and zip, add to metadata
if args.jar:
metadata += ',JAR={}'.format(args.jar)
if args.zip:
metadata += ',ZIP={}'.format(args.zip)

# command to start cluster
cmd = ' '.join([
'gcloud dataproc clusters create',
Expand Down

0 comments on commit ddf4e48

Please sign in to comment.