diff --git a/easybuild/framework/easyblock.py b/easybuild/framework/easyblock.py index dd23447611..77e0df72a5 100644 --- a/easybuild/framework/easyblock.py +++ b/easybuild/framework/easyblock.py @@ -1034,6 +1034,27 @@ def make_dir(self, dir_name, clean, dontcreateinstalldir=False): mkdir(dir_name, parents=True) + def set_up_cuda_cache(self): + """Set up CUDA PTX cache.""" + + cuda_cache_maxsize = build_option('cuda_cache_maxsize') + if cuda_cache_maxsize is None: + cuda_cache_maxsize = 1 * 1024 # 1 GiB default value + else: + cuda_cache_maxsize = int(cuda_cache_maxsize) + + if cuda_cache_maxsize == 0: + self.log.info("Disabling CUDA PTX cache since cache size was set to zero") + env.setvar('CUDA_CACHE_DISABLE', '1') + else: + cuda_cache_dir = build_option('cuda_cache_dir') + if not cuda_cache_dir: + cuda_cache_dir = os.path.join(self.builddir, 'eb-cuda-cache') + self.log.info("Enabling CUDA PTX cache of size %s MiB at %s", cuda_cache_maxsize, cuda_cache_dir) + env.setvar('CUDA_CACHE_DISABLE', '0') + env.setvar('CUDA_CACHE_PATH', cuda_cache_dir) + env.setvar('CUDA_CACHE_MAXSIZE', str(cuda_cache_maxsize * 1024 * 1024)) + # # MODULE UTILITY FUNCTIONS # @@ -2163,6 +2184,10 @@ def prepare_step(self, start_dir=True, load_tc_deps_modules=True): self.log.info("Loading extra modules: %s", extra_modules) self.modules_tool.load(extra_modules) + # Setup CUDA cache if required. If we don't do this, CUDA will use the $HOME for its cache files + if get_software_root('CUDA') or get_software_root('CUDAcore'): + self.set_up_cuda_cache() + # guess directory to start configure/build/install process in, and move there if start_dir: self.guess_start_dir() diff --git a/easybuild/tools/config.py b/easybuild/tools/config.py index 9fb49e348e..b491b0cde7 100644 --- a/easybuild/tools/config.py +++ b/easybuild/tools/config.py @@ -171,6 +171,8 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX): 'container_image_name', 'container_template_recipe', 'container_tmpdir', + 'cuda_cache_dir', + 'cuda_cache_maxsize', 'cuda_compute_capabilities', 'download_timeout', 'dump_test_report', diff --git a/easybuild/tools/options.py b/easybuild/tools/options.py index 31acffa334..504b561eeb 100644 --- a/easybuild/tools/options.py +++ b/easybuild/tools/options.py @@ -360,6 +360,11 @@ def override_options(self): 'consider-archived-easyconfigs': ("Also consider archived easyconfigs", None, 'store_true', False), 'containerize': ("Generate container recipe/image", None, 'store_true', False, 'C'), 'copy-ec': ("Copy specified easyconfig(s) to specified location", None, 'store_true', False), + 'cuda-cache-dir': ("Path to CUDA cache dir to use if enabled. Defaults to a path inside the build dir.", + str, 'store', None, {'metavar': "PATH"}), + 'cuda-cache-maxsize': ("Maximum size of the CUDA cache (in MiB) used for JIT compilation of PTX code. " + "Leave value empty to let EasyBuild choose a value or '0' to disable the cache", + int, 'store_or_None', None), 'cuda-compute-capabilities': ("List of CUDA compute capabilities to use when building GPU software; " "values should be specified as digits separated by a dot, " "for example: 3.5,5.0,7.2", 'strlist', 'extend', None), diff --git a/test/framework/easyblock.py b/test/framework/easyblock.py index 1a784dc521..35f617d939 100644 --- a/test/framework/easyblock.py +++ b/test/framework/easyblock.py @@ -1894,6 +1894,60 @@ def test_prepare_step_hmns(self): self.assertEqual(len(loaded_modules), 1) self.assertEqual(loaded_modules[0]['mod_name'], 'GCC/6.4.0-2.28') + def test_prepare_step_cuda_cache(self): + """Test handling cuda-cache-* options.""" + + init_config(build_options={'cuda_cache_maxsize': None}) # Automatic mode + + test_ecs = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'easyconfigs', 'test_ecs') + toy_ec = os.path.join(test_ecs, 't', 'toy', 'toy-0.0.eb') + ec = process_easyconfig(toy_ec)[0] + eb = EasyBlock(ec['ec']) + eb.silent = True + eb.make_builddir() + + eb.prepare_step(start_dir=False) + logtxt = read_file(eb.logfile) + self.assertNotIn('Disabling CUDA PTX cache', logtxt) + self.assertNotIn('Enabling CUDA PTX cache', logtxt) + + # Now with CUDA + test_ec = os.path.join(self.test_prefix, 'test.eb') + test_ectxt = re.sub('^toolchain = .*', "toolchain = {'name': 'gcccuda', 'version': '2018a'}", + read_file(toy_ec), flags=re.M) + write_file(test_ec, test_ectxt) + ec = process_easyconfig(test_ec)[0] + eb = EasyBlock(ec['ec']) + eb.silent = True + eb.make_builddir() + + write_file(eb.logfile, '') + eb.prepare_step(start_dir=False) + logtxt = read_file(eb.logfile) + self.assertNotIn('Disabling CUDA PTX cache', logtxt) + self.assertIn('Enabling CUDA PTX cache', logtxt) + self.assertEqual(os.environ['CUDA_CACHE_DISABLE'], '0') + + init_config(build_options={'cuda_cache_maxsize': 0}) # Disable + write_file(eb.logfile, '') + eb.prepare_step(start_dir=False) + logtxt = read_file(eb.logfile) + self.assertIn('Disabling CUDA PTX cache', logtxt) + self.assertNotIn('Enabling CUDA PTX cache', logtxt) + self.assertEqual(os.environ['CUDA_CACHE_DISABLE'], '1') + + # Specified size and location + cuda_cache_dir = os.path.join(self.test_prefix, 'custom-cuda-cache') + init_config(build_options={'cuda_cache_maxsize': 1234, 'cuda_cache_dir': cuda_cache_dir}) + write_file(eb.logfile, '') + eb.prepare_step(start_dir=False) + logtxt = read_file(eb.logfile) + self.assertNotIn('Disabling CUDA PTX cache', logtxt) + self.assertIn('Enabling CUDA PTX cache', logtxt) + self.assertEqual(os.environ['CUDA_CACHE_DISABLE'], '0') + self.assertEqual(os.environ['CUDA_CACHE_MAXSIZE'], str(1234 * 1024 * 1024)) + self.assertEqual(os.environ['CUDA_CACHE_PATH'], cuda_cache_dir) + def test_checksum_step(self): """Test checksum step""" testdir = os.path.abspath(os.path.dirname(__file__)) diff --git a/test/framework/modules.py b/test/framework/modules.py index e370b1a88f..73fa242490 100644 --- a/test/framework/modules.py +++ b/test/framework/modules.py @@ -54,7 +54,7 @@ # number of modules included for testing purposes -TEST_MODULES_COUNT = 81 +TEST_MODULES_COUNT = 82 class ModulesTest(EnhancedTestCase): diff --git a/test/framework/modules/gcccuda/2018a b/test/framework/modules/gcccuda/2018a new file mode 100644 index 0000000000..f9779f1be5 --- /dev/null +++ b/test/framework/modules/gcccuda/2018a @@ -0,0 +1,26 @@ +#%Module + +proc ModulesHelp { } { + puts stderr { GCC based compiler toolchain with CUDA support, and including + OpenMPI for MPI support, OpenBLAS (BLAS and LAPACK support), FFTW and ScaLAPACK. - Homepage: (none) +} +} + +module-whatis {GNU Compiler Collection (GCC) based compiler toolchain, along with CUDA toolkit. - Homepage: (none)} + +set root /prefix/software/gcccuda/2018a + +conflict gcccuda + +if { ![is-loaded GCC/6.4.0-2.28] } { + module load GCC/6.4.0-2.28 +} + +if { ![is-loaded CUDA/9.1.85] } { + module load CUDA/9.1.85 +} + + +setenv EBROOTGCCCUDA "$root" +setenv EBVERSIONGCCCUDA "2018a" +setenv EBDEVELGCCCUDA "$root/easybuild/gcccuda-2018a-easybuild-devel" diff --git a/test/framework/options.py b/test/framework/options.py index 1cf0a11633..f3a0a29fed 100644 --- a/test/framework/options.py +++ b/test/framework/options.py @@ -4666,7 +4666,7 @@ def test_modules_tool_vs_syntax_check(self): regex = re.compile(pattern, re.M) self.assertTrue(regex.search(stdout), "Pattern '%s' found in: %s" % (regex.pattern, stdout)) - def test_prefix(self): + def test_prefix_option(self): """Test which configuration settings are affected by --prefix.""" txt, _ = self._run_mock_eb(['--show-full-config', '--prefix=%s' % self.test_prefix], raise_error=True)