Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create CUDA cache (for JIT compiled PTX code) in build dir instead of $HOME #3569

Merged
merged 5 commits into from
Apr 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions easybuild/framework/easyblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,27 @@ def make_dir(self, dir_name, clean, dontcreateinstalldir=False):

mkdir(dir_name, parents=True)

def set_up_cuda_cache(self):
"""Set up CUDA PTX cache."""

cuda_cache_maxsize = build_option('cuda_cache_maxsize')
if cuda_cache_maxsize is None:
cuda_cache_maxsize = 1 * 1024 # 1 GiB default value
else:
cuda_cache_maxsize = int(cuda_cache_maxsize)

if cuda_cache_maxsize == 0:
self.log.info("Disabling CUDA PTX cache since cache size was set to zero")
env.setvar('CUDA_CACHE_DISABLE', '1')
else:
cuda_cache_dir = build_option('cuda_cache_dir')
if not cuda_cache_dir:
cuda_cache_dir = os.path.join(self.builddir, 'eb-cuda-cache')
self.log.info("Enabling CUDA PTX cache of size %s MiB at %s", cuda_cache_maxsize, cuda_cache_dir)
env.setvar('CUDA_CACHE_DISABLE', '0')
env.setvar('CUDA_CACHE_PATH', cuda_cache_dir)
env.setvar('CUDA_CACHE_MAXSIZE', str(cuda_cache_maxsize * 1024 * 1024))

#
# MODULE UTILITY FUNCTIONS
#
Expand Down Expand Up @@ -2163,6 +2184,10 @@ def prepare_step(self, start_dir=True, load_tc_deps_modules=True):
self.log.info("Loading extra modules: %s", extra_modules)
self.modules_tool.load(extra_modules)

# Setup CUDA cache if required. If we don't do this, CUDA will use the $HOME for its cache files
if get_software_root('CUDA') or get_software_root('CUDAcore'):
boegel marked this conversation as resolved.
Show resolved Hide resolved
self.set_up_cuda_cache()

# guess directory to start configure/build/install process in, and move there
if start_dir:
self.guess_start_dir()
Expand Down
2 changes: 2 additions & 0 deletions easybuild/tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
'container_image_name',
'container_template_recipe',
'container_tmpdir',
'cuda_cache_dir',
'cuda_cache_maxsize',
'cuda_compute_capabilities',
'download_timeout',
'dump_test_report',
Expand Down
5 changes: 5 additions & 0 deletions easybuild/tools/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,11 @@ def override_options(self):
'consider-archived-easyconfigs': ("Also consider archived easyconfigs", None, 'store_true', False),
'containerize': ("Generate container recipe/image", None, 'store_true', False, 'C'),
'copy-ec': ("Copy specified easyconfig(s) to specified location", None, 'store_true', False),
'cuda-cache-dir': ("Path to CUDA cache dir to use if enabled. Defaults to a path inside the build dir.",
str, 'store', None, {'metavar': "PATH"}),
'cuda-cache-maxsize': ("Maximum size of the CUDA cache (in MiB) used for JIT compilation of PTX code. "
"Leave value empty to let EasyBuild choose a value or '0' to disable the cache",
int, 'store_or_None', None),
'cuda-compute-capabilities': ("List of CUDA compute capabilities to use when building GPU software; "
"values should be specified as digits separated by a dot, "
"for example: 3.5,5.0,7.2", 'strlist', 'extend', None),
Expand Down
54 changes: 54 additions & 0 deletions test/framework/easyblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -1894,6 +1894,60 @@ def test_prepare_step_hmns(self):
self.assertEqual(len(loaded_modules), 1)
self.assertEqual(loaded_modules[0]['mod_name'], 'GCC/6.4.0-2.28')

def test_prepare_step_cuda_cache(self):
"""Test handling cuda-cache-* options."""

init_config(build_options={'cuda_cache_maxsize': None}) # Automatic mode

test_ecs = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'easyconfigs', 'test_ecs')
toy_ec = os.path.join(test_ecs, 't', 'toy', 'toy-0.0.eb')
ec = process_easyconfig(toy_ec)[0]
eb = EasyBlock(ec['ec'])
eb.silent = True
eb.make_builddir()

eb.prepare_step(start_dir=False)
logtxt = read_file(eb.logfile)
self.assertNotIn('Disabling CUDA PTX cache', logtxt)
self.assertNotIn('Enabling CUDA PTX cache', logtxt)

# Now with CUDA
test_ec = os.path.join(self.test_prefix, 'test.eb')
test_ectxt = re.sub('^toolchain = .*', "toolchain = {'name': 'gcccuda', 'version': '2018a'}",
read_file(toy_ec), flags=re.M)
write_file(test_ec, test_ectxt)
ec = process_easyconfig(test_ec)[0]
eb = EasyBlock(ec['ec'])
eb.silent = True
eb.make_builddir()

write_file(eb.logfile, '')
eb.prepare_step(start_dir=False)
logtxt = read_file(eb.logfile)
self.assertNotIn('Disabling CUDA PTX cache', logtxt)
self.assertIn('Enabling CUDA PTX cache', logtxt)
self.assertEqual(os.environ['CUDA_CACHE_DISABLE'], '0')

init_config(build_options={'cuda_cache_maxsize': 0}) # Disable
write_file(eb.logfile, '')
eb.prepare_step(start_dir=False)
logtxt = read_file(eb.logfile)
self.assertIn('Disabling CUDA PTX cache', logtxt)
self.assertNotIn('Enabling CUDA PTX cache', logtxt)
self.assertEqual(os.environ['CUDA_CACHE_DISABLE'], '1')

# Specified size and location
cuda_cache_dir = os.path.join(self.test_prefix, 'custom-cuda-cache')
init_config(build_options={'cuda_cache_maxsize': 1234, 'cuda_cache_dir': cuda_cache_dir})
write_file(eb.logfile, '')
eb.prepare_step(start_dir=False)
logtxt = read_file(eb.logfile)
self.assertNotIn('Disabling CUDA PTX cache', logtxt)
self.assertIn('Enabling CUDA PTX cache', logtxt)
self.assertEqual(os.environ['CUDA_CACHE_DISABLE'], '0')
self.assertEqual(os.environ['CUDA_CACHE_MAXSIZE'], str(1234 * 1024 * 1024))
self.assertEqual(os.environ['CUDA_CACHE_PATH'], cuda_cache_dir)

def test_checksum_step(self):
"""Test checksum step"""
testdir = os.path.abspath(os.path.dirname(__file__))
Expand Down
2 changes: 1 addition & 1 deletion test/framework/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@


# number of modules included for testing purposes
TEST_MODULES_COUNT = 81
TEST_MODULES_COUNT = 82


class ModulesTest(EnhancedTestCase):
Expand Down
26 changes: 26 additions & 0 deletions test/framework/modules/gcccuda/2018a
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#%Module

proc ModulesHelp { } {
puts stderr { GCC based compiler toolchain with CUDA support, and including
OpenMPI for MPI support, OpenBLAS (BLAS and LAPACK support), FFTW and ScaLAPACK. - Homepage: (none)
}
}

module-whatis {GNU Compiler Collection (GCC) based compiler toolchain, along with CUDA toolkit. - Homepage: (none)}

set root /prefix/software/gcccuda/2018a

conflict gcccuda

if { ![is-loaded GCC/6.4.0-2.28] } {
module load GCC/6.4.0-2.28
}

if { ![is-loaded CUDA/9.1.85] } {
module load CUDA/9.1.85
}


setenv EBROOTGCCCUDA "$root"
setenv EBVERSIONGCCCUDA "2018a"
setenv EBDEVELGCCCUDA "$root/easybuild/gcccuda-2018a-easybuild-devel"
2 changes: 1 addition & 1 deletion test/framework/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -4666,7 +4666,7 @@ def test_modules_tool_vs_syntax_check(self):
regex = re.compile(pattern, re.M)
self.assertTrue(regex.search(stdout), "Pattern '%s' found in: %s" % (regex.pattern, stdout))

def test_prefix(self):
def test_prefix_option(self):
"""Test which configuration settings are affected by --prefix."""
txt, _ = self._run_mock_eb(['--show-full-config', '--prefix=%s' % self.test_prefix], raise_error=True)

Expand Down