diff --git a/easybuild/toolchains/compiler/clang.py b/easybuild/toolchains/compiler/clang.py index 8e1be062c9..b877fcb31c 100644 --- a/easybuild/toolchains/compiler/clang.py +++ b/easybuild/toolchains/compiler/clang.py @@ -85,6 +85,7 @@ class Clang(Compiler): 'defaultprec': [], 'loose': ['ffast-math', 'fno-unsafe-math-optimizations'], 'veryloose': ['ffast-math'], + 'vectorize': {False: 'fno-vectorize', True: 'fvectorize'}, } # used when 'optarch' toolchain option is enabled (and --optarch is not specified) diff --git a/easybuild/toolchains/compiler/gcc.py b/easybuild/toolchains/compiler/gcc.py index f3d2dc19c1..d92aec0191 100644 --- a/easybuild/toolchains/compiler/gcc.py +++ b/easybuild/toolchains/compiler/gcc.py @@ -35,7 +35,7 @@ import easybuild.tools.systemtools as systemtools from easybuild.tools.build_log import EasyBuildError from easybuild.tools.modules import get_software_root, get_software_version -from easybuild.tools.toolchain.compiler import Compiler +from easybuild.tools.toolchain.compiler import Compiler, DEFAULT_OPT_LEVEL TC_CONSTANT_GCC = "GCC" @@ -61,10 +61,12 @@ class Gcc(Compiler): 'lto': 'flto', 'ieee': ['mieee-fp', 'fno-trapping-math'], 'strict': ['mieee-fp', 'mno-recip'], - 'precise':['mno-recip'], - 'defaultprec':[], - 'loose': ['mrecip', 'mno-ieee-fp'], - 'veryloose': ['mrecip=all', 'mno-ieee-fp'], + 'precise': ['mno-recip'], + 'defaultprec': ['fno-math-errno'], + 'loose': ['fno-math-errno', 'mrecip', 'mno-ieee-fp'], + 'veryloose': ['fno-math-errno', 'mrecip=all', 'mno-ieee-fp'], + 'vectorize': {False: 'fno-tree-vectorize', True: 'ftree-vectorize'}, + DEFAULT_OPT_LEVEL: ['O2', 'ftree-vectorize'], } # used when 'optarch' toolchain option is enabled (and --optarch is not specified) diff --git a/easybuild/toolchains/compiler/ibmxl.py b/easybuild/toolchains/compiler/ibmxl.py index b4efb3ce92..65170d6ad7 100644 --- a/easybuild/toolchains/compiler/ibmxl.py +++ b/easybuild/toolchains/compiler/ibmxl.py @@ -11,7 +11,7 @@ from distutils.version import LooseVersion import easybuild.tools.systemtools as systemtools -from easybuild.tools.toolchain.compiler import Compiler +from easybuild.tools.toolchain.compiler import Compiler, DEFAULT_OPT_LEVEL TC_CONSTANT_IBMCOMP = "IBMXL" @@ -35,6 +35,8 @@ class IBMXL(Compiler): 'defaultprec': ['', '', ''], 'loose': [''], 'veryloose': [''], + 'vectorize': {False: 'qsimd=noauto', True: 'qsimd=auto'}, + DEFAULT_OPT_LEVEL: ['O2', 'qsimd=auto'], 'ibm-static': 'qstaticlink=xllibs', 'pic': 'qpic', 'shared': 'qmkshrobj', diff --git a/easybuild/toolchains/compiler/inteliccifort.py b/easybuild/toolchains/compiler/inteliccifort.py index 0ec3cde9f1..86b2330ae8 100644 --- a/easybuild/toolchains/compiler/inteliccifort.py +++ b/easybuild/toolchains/compiler/inteliccifort.py @@ -64,6 +64,7 @@ class IntelIccIfort(Compiler): 'defaultprec': ['ftz', 'fp-speculation=safe', 'fp-model source'], 'loose': ['fp-model fast=1'], 'veryloose': ['fp-model fast=2'], + 'vectorize': {False: 'no-vec', True: 'vec'}, 'intel-static': 'static-intel', 'no-icc': 'no-icc', 'error-unknown-option': 'we10006', # error at warning #10006: ignoring unknown option diff --git a/easybuild/toolchains/compiler/pgi.py b/easybuild/toolchains/compiler/pgi.py index f5ec0ec94f..338789be69 100644 --- a/easybuild/toolchains/compiler/pgi.py +++ b/easybuild/toolchains/compiler/pgi.py @@ -65,6 +65,7 @@ class Pgi(Compiler): 'defaultprec': ['Mflushz'], 'loose': ['Mfprelaxed'], 'veryloose': ['Mfprelaxed=div,order,intrinsic,recip,sqrt,rsqrt', 'Mfpapprox'], + 'vectorize': {False: 'Mnovect', True: 'Mvect'}, } # used when 'optarch' toolchain option is enabled (and --optarch is not specified) diff --git a/easybuild/tools/toolchain/compiler.py b/easybuild/tools/toolchain/compiler.py index 3147e9ba86..d97a0e7e34 100644 --- a/easybuild/tools/toolchain/compiler.py +++ b/easybuild/tools/toolchain/compiler.py @@ -88,6 +88,7 @@ class Compiler(Toolchain): 'static': (False, "Build static library"), '32bit': (False, "Compile 32bit target"), # LA, FFTW 'openmp': (False, "Enable OpenMP"), + 'vectorize': (None, "Enable compiler auto-vectorization, default except for noopt and lowopt"), 'packed-linker-options': (False, "Pack the linker options as comma separated list"), # ScaLAPACK mainly 'rpath': (True, "Use RPATH wrappers when --rpath is enabled in EasyBuild configuration"), } @@ -245,8 +246,19 @@ def _set_compiler_flags(self): (default_opt_level, self.COMPILER_OPT_FLAGS)) # 1st one is the one to use. add default at the end so len is at least 1 - optflags = [self.options.option(x) for x in self.COMPILER_OPT_FLAGS if self.options.get(x, False)] + \ - [self.options.option(default_opt_level)] + optflags = ([self.options.option(x) for x in self.COMPILER_OPT_FLAGS if self.options.get(x, False)] + \ + [self.options.option(default_opt_level)])[:1] + + # only apply if the vectorize toolchainopt is explicitly set + # otherwise the individual compiler toolchain file should make sure that + # vectorization is disabled for noopt and lowopt, and enabled otherwise. + if self.options.get('vectorize') is not None: + vectoptions = self.options.option('vectorize') + vectflags = vectoptions[self.options['vectorize']] + # avoid double use of such flags, or e.g. -fno-tree-vectorize followed by -ftree-vectorize + if isinstance(optflags[0], list): + optflags[0] = [flag for flag in optflags[0] if flag not in vectoptions.values()] + optflags.append(vectflags) optarchflags = [] if build_option('optarch') == OPTARCH_GENERIC: @@ -259,7 +271,7 @@ def _set_compiler_flags(self): precflags = [self.options.option(x) for x in self.COMPILER_PREC_FLAGS if self.options.get(x, False)] + \ [self.options.option('defaultprec')] - self.variables.nextend('OPTFLAGS', optflags[:1] + optarchflags) + self.variables.nextend('OPTFLAGS', optflags + optarchflags) self.variables.nextend('PRECFLAGS', precflags[:1]) # precflags last diff --git a/test/framework/options.py b/test/framework/options.py index 13e5ee09f6..efc93afd0a 100644 --- a/test/framework/options.py +++ b/test/framework/options.py @@ -2961,7 +2961,7 @@ def test_dump_env_config(self): "module load hwloc/1.6.2-GCC-4.7.2", # loading of dependency module # defining build env "export FC='gfortran'", - "export CFLAGS='-O2 -march=native'", + "export CFLAGS='-O2 -ftree-vectorize -march=native -fno-math-errno'", ] for pattern in patterns: regex = re.compile("^%s$" % pattern, re.M) diff --git a/test/framework/toolchain.py b/test/framework/toolchain.py index 5a7add78a9..69579a3867 100644 --- a/test/framework/toolchain.py +++ b/test/framework/toolchain.py @@ -277,7 +277,7 @@ def test_misc_flags_shared(self): # we need to make sure we check for flags, not letter (e.g. 'v' vs '-v') flag = '-%s' % tc.COMPILER_SHARED_OPTION_MAP[opt] for var in flag_vars: - flags = tc.get_variable(var) + flags = tc.get_variable(var).split() if enable: self.assertTrue(flag in flags, "%s: True means %s in %s" % (opt, flag, flags)) else: @@ -290,21 +290,25 @@ def test_misc_flags_unique(self): flag_vars = ['CFLAGS', 'CXXFLAGS', 'FCFLAGS', 'FFLAGS', 'F90FLAGS'] # setting option should result in corresponding flag to be set (unique options) - for opt in ['unroll', 'optarch', 'openmp']: + for opt in ['unroll', 'optarch', 'openmp', 'vectorize']: for enable in [True, False]: tc = self.get_toolchain("goalf", version="1.1.0-no-OFED") tc.set_options({opt: enable}) tc.prepare() if opt == 'optarch': - flag = '-%s' % tc.COMPILER_OPTIMAL_ARCHITECTURE_OPTION[(tc.arch, tc.cpu_family)] + option = tc.COMPILER_OPTIMAL_ARCHITECTURE_OPTION[(tc.arch, tc.cpu_family)] else: - flag = '-%s' % tc.options.options_map[opt] + option = tc.options.options_map[opt] + if not isinstance(option, dict): + option = {True: option} for var in flag_vars: flags = tc.get_variable(var) - if enable: - self.assertTrue(flag in flags, "%s: True means %s in %s" % (opt, flag, flags)) - else: - self.assertTrue(flag not in flags, "%s: False means no %s in %s" % (opt, flag, flags)) + for key, value in option.items(): + flag = "-%s" % value + if enable == key: + self.assertTrue(flag in flags, "%s: %s means %s in %s" % (opt, enable, flag, flags)) + else: + self.assertTrue(flag not in flags, "%s: %s means no %s in %s" % (opt, enable, flag, flags)) self.modtool.purge() def test_override_optarch(self): @@ -387,7 +391,7 @@ def test_compiler_dependent_optarch(self): """Test whether specifying optarch on a per compiler basis works.""" flag_vars = ['CFLAGS', 'CXXFLAGS', 'FCFLAGS', 'FFLAGS', 'F90FLAGS'] intel_options = [('intelflag', 'intelflag'), ('GENERIC', 'xSSE2'), ('', '')] - gcc_options = [('gccflag', 'gccflag'), ('-ftree-vectorize', '-ftree-vectorize'), ('', '')] + gcc_options = [('gccflag', 'gccflag'), ('march=nocona', 'march=nocona'), ('', '')] gcccore_options = [('gcccoreflag', 'gcccoreflag'), ('GENERIC', 'march=x86-64 -mtune=generic'), ('', '')] toolchains = [('iccifort', '2011.13.367'), ('GCC', '4.7.2'), ('GCCcore', '6.2.0'), ('PGI', '16.7-GCC-5.4.0-2.26')] enabled = [True, False] @@ -476,20 +480,20 @@ def test_precision_flags(self): flag_vars = ['CFLAGS', 'CXXFLAGS', 'FCFLAGS', 'FFLAGS', 'F90FLAGS'] - # check default precision: no specific flag for GCC + # check default precision: -fno-math-errno flag for GCC tc = self.get_toolchain("goalf", version="1.1.0-no-OFED") tc.set_options({}) tc.prepare() for var in flag_vars: - self.assertEqual(os.getenv(var), "-O2 -march=native") + self.assertEqual(os.getenv(var), "-O2 -ftree-vectorize -march=native -fno-math-errno") # check other precision flags prec_flags = { - 'ieee': "-mieee-fp -fno-trapping-math", + 'ieee': "-fno-math-errno -mieee-fp -fno-trapping-math", 'strict': "-mieee-fp -mno-recip", 'precise': "-mno-recip", - 'loose': "-mrecip -mno-ieee-fp", - 'veryloose': "-mrecip=all -mno-ieee-fp", + 'loose': "-fno-math-errno -mrecip -mno-ieee-fp", + 'veryloose': "-fno-math-errno -mrecip=all -mno-ieee-fp", } for prec in prec_flags: for enable in [True, False]: @@ -498,9 +502,9 @@ def test_precision_flags(self): tc.prepare() for var in flag_vars: if enable: - self.assertEqual(os.getenv(var), "-O2 -march=native %s" % prec_flags[prec]) + self.assertEqual(os.getenv(var), "-O2 -ftree-vectorize -march=native %s" % prec_flags[prec]) else: - self.assertEqual(os.getenv(var), "-O2 -march=native") + self.assertEqual(os.getenv(var), "-O2 -ftree-vectorize -march=native -fno-math-errno") self.modtool.purge() def test_cgoolf_toolchain(self): @@ -579,8 +583,10 @@ def test_goolfc(self): tc.set_options(opts) tc.prepare() + archflags = tc.COMPILER_OPTIMAL_ARCHITECTURE_OPTION[(tc.arch, tc.cpu_family)] + optflags = "-O2 -ftree-vectorize -%s -fno-math-errno -fopenmp" % archflags nvcc_flags = r' '.join([ - r'-Xcompiler="-O2 -%s -fopenmp"' % tc.COMPILER_OPTIMAL_ARCHITECTURE_OPTION[(tc.arch, tc.cpu_family)], + r'-Xcompiler="%s"' % optflags, # the use of -lcudart in -Xlinker is a bit silly but hard to avoid r'-Xlinker=".* -lm -lrt -lcudart -lpthread"', r' '.join(["-gencode %s" % x for x in opts['cuda_gencode']]), @@ -903,9 +909,9 @@ def test_independence(self): tc_cflags = { 'CrayCCE': "-O2 -homp -craype-verbose", - 'CrayGNU': "-O2 -fopenmp -craype-verbose", + 'CrayGNU': "-O2 -fno-math-errno -fopenmp -craype-verbose", 'CrayIntel': "-O2 -ftz -fp-speculation=safe -fp-model source -fopenmp -craype-verbose", - 'GCC': "-O2 -test -fopenmp", + 'GCC': "-O2 -ftree-vectorize -test -fno-math-errno -fopenmp", 'iccifort': "-O2 -test -ftz -fp-speculation=safe -fp-model source -fopenmp", }