From e03b6b9040aab7bb511a1b3c7d5adb5f56de21d5 Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:31 -0400 Subject: [PATCH 01/28] Allow reaction generation with multiple processes --- rmg.py | 8 + rmgpy/data/kinetics/database.py | 4 +- rmgpy/rmg/main.py | 14 ++ rmgpy/rmg/modelTest.py | 32 +--- rmgpy/rmg/pdep.py | 4 +- rmgpy/rmg/react.py | 320 +++++++++++++++++++++++--------- rmgpy/rmg/reactTest.py | 105 +---------- rmgpy/rmg/rmgTest.py | 2 +- 8 files changed, 277 insertions(+), 212 deletions(-) diff --git a/rmg.py b/rmg.py index 0439c85626..2401f0abfe 100755 --- a/rmg.py +++ b/rmg.py @@ -82,6 +82,10 @@ def parse_command_line_arguments(command_line_args=None): parser.add_argument('-t', '--walltime', type=str, nargs=1, default='00:00:00:00', metavar='DD:HH:MM:SS', help='set the maximum execution time') + # Add option to select max number of processes for reaction generation + parser.add_argument('-n', '--maxproc', type=int, nargs=1, default=1, + help='max number of processes used during reaction generation') + # Add option to output a folder that stores the details of each kinetic database entry source parser.add_argument('-k', '--kineticsdatastore', action='store_true', help='output a folder, kinetics_database, that contains a .txt file for each reaction family ' @@ -99,6 +103,9 @@ def parse_command_line_arguments(command_line_args=None): if args.walltime != '00:00:00:00': args.walltime = args.walltime[0] + if args.maxproc != 1: + args.maxproc = args.maxproc[0] + # Set directories input_directory = os.path.abspath(os.path.dirname(args.file)) @@ -136,6 +143,7 @@ def main(): kwargs = { 'restart': args.restart, 'walltime': args.walltime, + 'maxproc': args.maxproc, 'kineticsdatastore': args.kineticsdatastore } diff --git a/rmgpy/data/kinetics/database.py b/rmgpy/data/kinetics/database.py index 1eeedbdbb9..5f5bcfa3e7 100644 --- a/rmgpy/data/kinetics/database.py +++ b/rmgpy/data/kinetics/database.py @@ -478,6 +478,8 @@ def generate_reactions_from_families(self, reactants, products=None, only_famili # Check if the reactants are the same # If they refer to the same memory address, then make a deep copy so # they can be manipulated independently + if isinstance(reactants, tuple): + reactants = list(reactants) same_reactants = 0 if len(reactants) == 2: if reactants[0] is reactants[1]: @@ -512,8 +514,6 @@ def generate_reactions_from_families(self, reactants, products=None, only_famili same_reactants = 2 # Label reactant atoms for proper degeneracy calculation (cannot be in tuple) - if isinstance(reactants, tuple): - reactants = list(reactants) ensure_independent_atom_ids(reactants, resonance=resonance) combos = generate_molecule_combos(reactants) diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py index 6cefe307de..2de8049765 100644 --- a/rmgpy/rmg/main.py +++ b/rmgpy/rmg/main.py @@ -39,6 +39,8 @@ import logging import os import shutil +import psutil +import resource import numpy as np import gc @@ -81,6 +83,9 @@ solvent = None +# Maximum number of user defined processors +maxproc = 1 + class RMG(util.Subject): """ A representation of a Reaction Mechanism Generator (RMG) job. The @@ -486,6 +491,15 @@ def initialize(self, **kwargs): raise ValueError('Invalid format for wall time {0}; should be DD:HH:MM:SS.'.format(self.wallTime)) self.wallTime = int(data[-1]) + 60 * int(data[-2]) + 3600 * int(data[-3]) + 86400 * int(data[-4]) + global maxproc + try: + maxproc = kwargs['maxproc'] + except KeyError: + pass + + if maxproc > psutil.cpu_count(): + raise ValueError('Invalid input for user defined maximum number of processes {0}; should be an integer and smaller or equal to your available number of processes {1}'.format(maxproc, psutil.cpu_count())) + # Initialize reaction model if restart: self.initializeRestartRun(os.path.join(self.outputDirectory,'restart.pkl')) diff --git a/rmgpy/rmg/modelTest.py b/rmgpy/rmg/modelTest.py index 0a81aa0f68..392f7958c5 100644 --- a/rmgpy/rmg/modelTest.py +++ b/rmgpy/rmg/modelTest.py @@ -140,10 +140,10 @@ class item: spcA = Species().fromSMILES('[OH]') spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] - spcTuples = [(spcA, spc) for spc in spcs] + spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs] rxns = list(react(*spcTuples)) - rxns += list(react(*[(spcs[0],spcs[1])])) + rxns += list(react(*[(spcs[0], spcs[1], ['H_Abstraction'])])) for rxn in rxns: cerm.makeNewReaction(rxn) @@ -243,7 +243,7 @@ def testMakeNewReaction(self): spcA = Species().fromSMILES('[OH]') spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] - spcTuples = [(spcA, spc) for spc in spcs] + spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs] rxns = list(react(*spcTuples)) @@ -390,32 +390,6 @@ def testThermoFilterDown(self): self.assertEquals(len(difset),1) #should be one because we thermo filtered down to one edge species - def testInflate(self): - """ - Test that CoreEdgeReactionModel.inflate method correctly works. - """ - spcA = Species().fromSMILES('[OH]') - spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] - spcTuples = [(spcA, spc) for spc in spcs] - - rxns = list(react(*spcTuples)) - - cerm = CoreEdgeReactionModel() - - for rxn in rxns: - cerm.makeNewReaction(rxn) - - """ - 3 expected H-abstraction reactions: - OH + CC = H2O + C[CH2] - OH + [CH3] = H2O + [CH2] - OH + [CH3] = [O] + C - """ - for i, rxn in enumerate(rxns): - rxns[i] = cerm.inflate(rxn) - - for rxn in rxns: - self.assertTrue(rxn.isBalanced()) def test_checkForExistingReaction_eliminates_identical_reactions(self): """ diff --git a/rmgpy/rmg/pdep.py b/rmgpy/rmg/pdep.py index fa35941a3d..30bd40dd59 100644 --- a/rmgpy/rmg/pdep.py +++ b/rmgpy/rmg/pdep.py @@ -44,7 +44,7 @@ from rmgpy.constants import R from rmgpy.pdep import Conformer, Configuration -from rmgpy.rmg.react import react +from rmgpy.rmg.react import reactPdep from rmgpy.exceptions import PressureDependenceError, NetworkError from rmgpy.data.kinetics.library import LibraryReaction @@ -300,7 +300,7 @@ def exploreIsomer(self, isomer): # Don't find reactions involving the new species as bimolecular # reactants or products with other core species (e.g. A + B <---> products) - newReactions = react((isomer,)) + newReactions = reactPdep((isomer,)) return newReactions diff --git a/rmgpy/rmg/react.py b/rmgpy/rmg/react.py index 9a08a07fb9..b78a4dc6a7 100644 --- a/rmgpy/rmg/react.py +++ b/rmgpy/rmg/react.py @@ -32,10 +32,14 @@ Contains functions for generating reactions. """ import itertools +import logging +import resource +import psutil +import os +from sys import platform from rmgpy.data.rmg import getDB -from rmgpy.scoop_framework.util import map_ - +from multiprocessing import Pool def react(*spcTuples): """ @@ -54,117 +58,267 @@ def react(*spcTuples): Returns a flat generator object containing the generated Reaction objects. """ - results = map_( + from rmgpy.rmg.main import maxproc + + # Get available RAM (GB)and procnum dependent on OS. + if platform.startswith('linux'): + # linux + memory_available = psutil.virtual_memory().free / (1000.0 ** 3) + memory_use = psutil.Process(os.getpid()).memory_info()[0]/(1000.0 ** 3) + tmp = divmod(memory_available, memory_use) + tmp2 = min(maxproc, tmp[0]) + procnum = max(1, int(tmp2)) + if maxproc == 1: + logging.info('For reaction generation {0} process is used.'.format(procnum)) + else: + logging.info('For reaction generation {0} processes are used.'.format(procnum)) + elif platform == "darwin": + # OS X + memory_available = psutil.virtual_memory().available/(1000.0 ** 3) + memory_use = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/(1000.0 ** 3) + tmp = divmod(memory_available, memory_use) + tmp2 = min(maxproc, tmp[0]) + procnum = max(1, int(tmp2)) + if maxproc == 1: + logging.info('For reaction generation {0} process is used.'.format(procnum)) + else: + logging.info('For reaction generation {0} processes are used.'.format(procnum)) + else: + # Everything else + procnum = 1 + logging.info('For reaction generation {0} process is used.'.format(procnum)) + + # Execute multiprocessing map. It blocks until the result is ready. + # This method chops the iterable into a number of chunks which it + # submits to the process pool as separate tasks. + p = Pool(processes=procnum) + + reactions = p.map( reactSpecies, spcTuples) - reactions = itertools.chain.from_iterable(results) + p.close() + p.join() - return reactions + return itertools.chain.from_iterable(reactions) -def reactSpecies(speciesTuple): +def reactSpecies(speciesTupleTmp): """ Given a tuple of Species objects, generates all possible reactions from the loaded reaction families and combines degenerate reactions. - - The generated reactions are deflated. """ - speciesTuple = tuple([spc.copy(deep=True) for spc in speciesTuple]) - reactions = getDB('kinetics').generate_reactions_from_families(speciesTuple) + speciesTuple = speciesTupleTmp[0:-1] + own_families = speciesTupleTmp[-1] + + speciesTuple = tuple([spc.copy(deep=True) for spc in speciesTuple]) - deflate(reactions, - [spec for spec in speciesTuple], - [spec.index for spec in speciesTuple]) + reactions = getDB('kinetics').generate_reactions_from_families(speciesTuple, only_families=own_families) return reactions -def deflate(rxns, species, reactantIndices): +def reactAll(coreSpcList, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=None): + """ + Reacts the core species list via uni-, bi-, and trimolecular + reactions. """ - The purpose of this function is to replace the reactants and - products of a reaction, stored as Molecule objects by - integer indices, corresponding to the species core index. - - Creates a dictionary with Molecule objects as keys and newly - created Species objects as values. - It iterates over the reactantIndices array, with elements in this array - corresponding to the indices of the core species. It creates a - Molecule -> index entry in the previously created dictionary. + from rmgpy.rmg.main import maxproc + + # Load kineticsFamilies to be added to reactant tuple to allow for improved load balancing + # in parallel jobs. + split_listOrig = [] + split_list_tmp = [] + for key in getDB('kinetics').families: + split_listOrig.append(key) + split_list_tmp.append(key) + + if maxproc == 1: + # Select reactive species that can undergo unimolecular reactions: + spc_tuplestmp = [(core_spc_list[i], split_listOrig) + for i in xrange(numOldCoreSpecies) if (unimolecularReact[i] and core_spc_list[i].reactive)] + + for i in xrange(numOldCoreSpecies): + for j in xrange(i, numOldCoreSpecies): + # Find reactions involving the species that are bimolecular. + # This includes a species reacting with itself (if its own concentration is high enough). + if bimolecularReact[i,j]: + if core_spc_list[i].reactive and core_spc_list[j].reactive: + spc_tuplestmp.append((core_spc_list[i], core_spc_list[j], split_listOrig)) + + if trimolecularReact is not None: + for i in xrange(numOldCoreSpecies): + for j in xrange(i, numOldCoreSpecies): + for k in xrange(j, numOldCoreSpecies): + # Find reactions involving the species that are trimolecular. + if trimolecularReact[i,j,k]: + if core_spc_list[i].reactive and core_spc_list[j].reactive and core_spc_list[k].reactive: + spc_tuplestmp.append((core_spc_list[i], core_spc_list[j], core_spc_list[k], split_listOrig)) + else: + # Select reactive species that can undergo unimolecular reactions: + spc_tuples = [(core_spc_list[i],) + for i in xrange(numOldCoreSpecies) if (unimolecularReact[i] and core_spc_list[i].reactive)] + + for i in xrange(numOldCoreSpecies): + for j in xrange(i, numOldCoreSpecies): + # Find reactions involving the species that are bimolecular. + # This includes a species reacting with itself (if its own concentration is high enough). + if bimolecularReact[i,j]: + if core_spc_list[i].reactive and core_spc_list[j].reactive: + spc_tuples.append((core_spc_list[i], core_spc_list[j])) + + if trimolecularReact is not None: + for i in xrange(numOldCoreSpecies): + for j in xrange(i, numOldCoreSpecies): + for k in xrange(j, numOldCoreSpecies): + # Find reactions involving the species that are trimolecular. + if trimolecularReact[i,j,k]: + if core_spc_list[i].reactive and core_spc_list[j].reactive and core_spc_list[k].reactive: + spc_tuples.append((core_spc_list[i], core_spc_list[j], core_spc_list[k])) + + + # Identify and split families that are prone to generate many reactions into sublists. + split_list = [] + for i in enumerate(split_list_tmp): + if split_list_tmp[i] == 'H_Abstraction': + split_list_tmp[i] = [] + split_list.append(['H_Abstraction']) + elif split_list_tmp[i] == 'R_Recombination': + split_list_tmp[i] = [] + split_list.append(['R_Recombination']) + elif split_list_tmp[i] == 'Intra_Disproportionation': + split_list_tmp[i] = [] + split_list.append(['Intra_Disproportionation']) + elif split_list_tmp[i] == 'Intra_RH_Add_Endocyclic': + split_list_tmp[i] = [] + split_list.append(['Intra_RH_Add_Endocyclic']) + elif split_list_tmp[i] == 'Singlet_Carbene_Intra_Disproportionation': + split_list_tmp[i] = [] + split_list.append(['Singlet_Carbene_Intra_Disproportionation']) + elif split_list_tmp[i] == 'Intra_ene_reaction': + split_list_tmp[i] = [] + split_list.append(['Intra_ene_reaction']) + elif split_list_tmp[i] == 'Disproportionation': + split_list_tmp[i] = [] + split_list.append(['Disproportionation']) + elif split_list_tmp[i] == '1,4_Linear_birad_scission': + split_list_tmp[i] = [] + split_list.append(['1,4_Linear_birad_scission']) + elif split_list_tmp[i] == 'R_Addition_MultipleBond': + split_list_tmp[i] = [] + split_list.append(['R_Addition_MultipleBond']) + elif split_list_tmp[i] == '2+2_cycloaddition_Cd': + split_list_tmp[i] = [] + split_list.append(['2+2_cycloaddition_Cd']) + elif split_list_tmp[i] == 'Diels_alder_addition': + split_list_tmp[i] = [] + split_list.append(['Diels_alder_addition']) + elif split_list_tmp[i] == 'Intra_RH_Add_Exocyclic': + split_list_tmp[i] = [] + split_list.append(['Intra_RH_Add_Exocyclic']) + elif split_list_tmp[i] == 'Intra_Retro_Diels_alder_bicyclic': + split_list_tmp[i] = [] + split_list.append(['Intra_Retro_Diels_alder_bicyclic']) + elif split_list_tmp[i] == 'Intra_2+2_cycloaddition_Cd': + split_list_tmp[i] = [] + split_list.append(['Intra_2+2_cycloaddition_Cd']) + elif split_list_tmp[i] == 'Birad_recombination': + split_list_tmp[i] = [] + split_list.append(['Birad_recombination']) + elif split_list_tmp[i] == 'Intra_Diels_alder_monocyclic': + split_list_tmp[i] = [] + split_list.append(['Intra_Diels_alder_monocyclic']) + elif split_list_tmp[i] == '1,4_Cyclic_birad_scission': + split_list_tmp[i] = [] + split_list.append(['1,4_Cyclic_birad_scission']) + elif split_list_tmp[i] == '1,2_Insertion_carbene': + split_list_tmp[i] = [] + split_list.append(['1,2_Insertion_carbene']) + + # Remove empty lists from remaining split_list_tmp. It now contains only + # families that are not mentioned above. + split_list.append(filter(None, split_list_tmp)) + + # Only employ family splitting for reactants that have a larger number than nAFS. + nAFS = 10 + + spc_tuplestmp = [] + # Append reaction families to reactant tuple. + for tmpj in spc_tuples: + if len(tmpj) == 1: + if len(str(tmpj[0])) > nAFS: + for tmpl in split_list: + tmpk = list(tmpj) + tmpk.append(tmpl) + spc_tuplestmp.append(tuple(tmpk)) + else: + tmpk = list(tmpj) + tmpk.append(split_listOrig) + spc_tuplestmp.append(tuple(tmpk)) + elif len(tmpj) == 2: + if (len(str(tmpj[0])) > nAFS + ) or (len(str(tmpj[1])) > nAFS): + for tmpl in split_list: + tmpk = list(tmpj) + tmpk.append(tmpl) + spc_tuplestmp.append(tuple(tmpk)) + else: + tmpk = list(tmpj) + tmpk.append(split_listOrig) + spc_tuplestmp.append(tuple(tmpk)) + else: + if (len(str(tmpj[0])) > nAFS + ) or (len(str(tmpj[1])) > nAFS + ) or (len(str(tmpj[2])) > nAFS): + for tmpl in split_list: + tmpk = list(tmpj) + tmpk.append(tmpl) + spc_tuplestmp.append(tuple(tmpk)) + else: + tmpk = list(tmpj) + tmpk.append(split_listOrig) + spc_tuplestmp.append(tuple(tmpk)) + + rxns = list(react(*spc_tuplestmp)) - It iterates over the reaction list, and iteratively updates the - created dictionary as more reactions are processed. - """ + return rxns - molDict = {} +def reactPdep(*spc_tuples): + """ + Generate reactions between the species in the + list of species tuples for all the reaction families available. - for i, coreIndex in enumerate(reactantIndices): - if coreIndex != -1: - for mol in species[i].molecule: - molDict[mol] = coreIndex + For each tuple of one or more Species objects [(spc1,), (spc2, spc3), ...] + the following is done: - for rxn in rxns: - deflateReaction(rxn, molDict) - try: - deflateReaction(rxn.reverse, molDict) - except AttributeError: - pass + A list of tuples is created for each resonance isomer of the species. + Each tuple consists of (Molecule, index) with the index the species index of the Species object. + Possible combinations between the first spc in the tuple, and the second species in the tuple + is obtained by taking the combinatorial product of the two generated [(Molecule, index)] lists. -def reactAll(coreSpcList, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=None): - """ - Reacts the core species list via uni-, bi-, and trimolecular - reactions. + Returns a flat generator object containing the generated Reaction objects. """ - # Select reactive species that can undergo unimolecular reactions: - spcTuples = [(coreSpcList[i],) - for i in xrange(numOldCoreSpecies) if (unimolecularReact[i] and coreSpcList[i].reactive)] + reactions = map( + react_species_pdep, + spc_tuples) - for i in xrange(numOldCoreSpecies): - for j in xrange(i, numOldCoreSpecies): - # Find reactions involving the species that are bimolecular - # This includes a species reacting with itself (if its own concentration is high enough) - if bimolecularReact[i,j]: - if coreSpcList[i].reactive and coreSpcList[j].reactive: - spcTuples.append((coreSpcList[i], coreSpcList[j])) + return itertools.chain.from_iterable(reactions) - if trimolecularReact is not None: - for i in xrange(numOldCoreSpecies): - for j in xrange(i, numOldCoreSpecies): - for k in xrange(j, numOldCoreSpecies): - # Find reactions involving the species that are trimolecular - if trimolecularReact[i,j,k]: - if coreSpcList[i].reactive and coreSpcList[j].reactive and coreSpcList[k].reactive: - spcTuples.append((coreSpcList[i], coreSpcList[j], coreSpcList[k])) - rxns = list(react(*spcTuples)) - return rxns +def react_species_pdep(species_tuple): + """ + Given a tuple of Species objects, generates all possible reactions + from the loaded reaction families and combines degenerate reactions. + """ + species_tuple = tuple([spc.copy(deep=True) for spc in species_tuple]) -def deflateReaction(rxn, molDict): - """ - This function deflates a single reaction holding species objects, and uses the provided - dictionary to populate reactants/products/pairs with integer indices, - if possible. + reactions = getDB('kinetics').generate_reactions_from_families(species_tuple) + + return reactions - If the Molecule object could not be found in the dictionary, a new - dictionary entry is created, using the Species object as the value - for the entry. - The reactants/products/pairs of both the forward and reverse reaction - object are populated with the value of the dictionary, either an - integer index, or either a Species object. - """ - for spec in itertools.chain(rxn.reactants, rxn.products): - if not spec.molecule[0] in molDict: - molDict[spec.molecule[0]] = spec - - rxn.reactants = [molDict[spec.molecule[0]] for spec in rxn.reactants] - rxn.products = [molDict[spec.molecule[0]] for spec in rxn.products] - try: - rxn.pairs = [(molDict[reactant.molecule[0]], molDict[product.molecule[0]]) for reactant, product in rxn.pairs] - except ValueError: - rxn.pairs = None diff --git a/rmgpy/rmg/reactTest.py b/rmgpy/rmg/reactTest.py index 8bf9fc5e3e..dbebe9d280 100644 --- a/rmgpy/rmg/reactTest.py +++ b/rmgpy/rmg/reactTest.py @@ -40,7 +40,7 @@ from rmgpy.species import Species from rmgpy.rmg.main import RMG -from rmgpy.rmg.react import react, reactAll, deflate, deflateReaction +from rmgpy.rmg.react import react, reactAll ################################################### @@ -73,73 +73,26 @@ def testReact(self): """ spcA = Species().fromSMILES('[OH]') spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] - spcTuples = [(spcA, spc) for spc in spcs] + spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs] reactionList = list(react(*spcTuples)) self.assertIsNotNone(reactionList) self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in reactionList])) - def testDeflate(self): + def testReactMultiproc(self): """ - Test that reaction deflate function works. + Test that reaction generation from the available families works with python multiprocessing. """ - molA = Species().fromSMILES('[OH]') - molB = Species().fromSMILES('CC') - molC = Species().fromSMILES('[CH3]') + import rmgpy.rmg.main + rmgpy.rmg.main.maxproc = 2 - reactants = [molA, molB] - - # both reactants were already part of the core: - reactantIndices = [1, 2] - - rxn = Reaction(reactants=[molA, molB], products=[molC], - pairs=[(molA, molC), (molB, molC)]) - - deflate([rxn], reactants, reactantIndices) - - for spc, t in zip(rxn.reactants, [int, int]): - self.assertTrue(isinstance(spc, t)) - self.assertEquals(rxn.reactants, reactantIndices) - for spc in rxn.products: - self.assertTrue(isinstance(spc, Species)) - - # one of the reactants was not yet part of the core: - reactantIndices = [-1, 2] - - rxn = Reaction(reactants=[molA, molB], products=[molC], - pairs=[(molA, molC), (molB, molC)]) - - deflate([rxn], reactants, reactantIndices) - - for spc, t in zip(rxn.reactants, [Species, int]): - self.assertTrue(isinstance(spc, t)) - for spc in rxn.products: - self.assertTrue(isinstance(spc, Species)) - - def testReactStoreIndices(self): - """ - Test that reaction generation keeps track of the original species indices. - """ - - indices = {'[OH]':1, 'CC':2, '[CH3]':3} - - # make it bidirectional so that we can look-up indices as well: - revd=dict([reversed(i) for i in indices.items()]) - indices.update(revd) - - spcA = Species(index=indices['[OH]']).fromSMILES('[OH]') - spcs = [Species(index=indices['CC']).fromSMILES('CC'), - Species(index=indices['[CH3]']).fromSMILES('[CH3]')] - - spcTuples = [(spcA, spc) for spc in spcs] + spcA = Species().fromSMILES('[OH]') + spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] + spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs] reactionList = list(react(*spcTuples)) self.assertIsNotNone(reactionList) - self.assertEquals(len(reactionList), 3) - for rxn in reactionList: - for i, reactant in enumerate(rxn.reactants): - rxn.reactants[i] = Molecule().fromSMILES(indices[reactant]) - self.assertTrue(rxn.isBalanced()) + self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in reactionList])) def testReactAll(self): """ @@ -157,44 +110,6 @@ def testReactAll(self): self.assertIsNotNone(rxns) self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in rxns])) - def testDeflateReaction(self): - """ - Test if the deflateReaction function works. - """ - - molA = Species().fromSMILES('[OH]') - molB = Species().fromSMILES('CC') - molC = Species().fromSMILES('[CH3]') - - # both reactants were already part of the core: - reactantIndices = [1, 2] - molDict = {molA.molecule[0]: 1, molB.molecule[0]: 2} - - rxn = Reaction(reactants=[molA, molB], products=[molC], - pairs=[(molA, molC), (molB, molC)]) - - deflateReaction(rxn, molDict) - - for spc, t in zip(rxn.reactants, [int, int]): - self.assertTrue(isinstance(spc, t)) - self.assertEquals(rxn.reactants, reactantIndices) - for spc in rxn.products: - self.assertTrue(isinstance(spc, Species)) - - # one of the reactants was not yet part of the core: - reactantIndices = [-1, 2] - molDict = {molA.molecule[0]: molA, molB.molecule[0]: 2} - - rxn = Reaction(reactants=[molA, molB], products=[molC], - pairs=[(molA, molC), (molB, molC)]) - - deflateReaction(rxn, molDict) - - for spc, t in zip(rxn.reactants, [Species, int]): - self.assertTrue(isinstance(spc, t), 'Species {} is not of type {}'.format(spc,t)) - for spc in rxn.products: - self.assertTrue(isinstance(spc, Species)) - def tearDown(self): """ diff --git a/rmgpy/rmg/rmgTest.py b/rmgpy/rmg/rmgTest.py index 2ab11dc804..b905fbc029 100644 --- a/rmgpy/rmg/rmgTest.py +++ b/rmgpy/rmg/rmgTest.py @@ -185,7 +185,7 @@ def testRestartFileGenerationAndParsing(self): self.rmg.reactionModel.core.species.append(spc2) newReactions = [] - newReactions.extend(react((spc1,spc2))) + newReactions.extend(react((spc1, spc2, ['H_Abstraction']))) # process newly generated reactions to make sure no duplicated reactions self.rmg.reactionModel.processNewReactions(newReactions, spc2, None) From f2c77659c4d27bd4334f694b714efcb49b4fd050 Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:31 -0400 Subject: [PATCH 02/28] Allow QMTP file generation with multiple processes --- rmgpy/rmg/model.py | 73 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index 3af4085573..7a2b04e84e 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -58,12 +58,37 @@ import rmgpy.data.rmg from .react import reactAll +from rmgpy.data.kinetics.common import ensure_independent_atom_ids, find_degenerate_reactions from pdep import PDepReaction, PDepNetwork # generateThermoDataFromQM under the Species class imports the qm package ################################################################################ +def CalculateThermoParallel(spc): + from rmgpy.rmg.input import getInput + + try: + quantumMechanics = getInput('quantumMechanics') + except Exception: + logging.debug('Quantum Mechanics DB could not be found.') + quantumMechanics = None + + spc.generate_resonance_structures() + original_molecule = spc.molecule[0] + + if not quantumMechanics: + pass + else: + if quantumMechanics.settings.onlyCyclics and not original_molecule.isCyclic(): +# print 'pass' + else: + print 'try a QM calculation' + if original_molecule.getRadicalCount() > quantumMechanics.settings.maxRadicalNumber: + print 'Too many radicals for direct calculation: use HBI.' + else: + print 'Not too many radicals: do a direct calculation.' + thermo0 = quantumMechanics.getThermoData(original_molecule) # returns None if it fails class ReactionModel: """ @@ -618,8 +643,54 @@ def enlarge(self, newObject=None, reactEdge=False, rxns = reactAll(self.core.species, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=trimolecularReact) - spcs = [self.retrieveNewSpecies(rxn) for rxn in rxns] + #spcs = [self.retrieveNewSpecies(rxn) for rxn in rxns] + + # get new species and save in spcs + spcs = [] + for rxn in rxns: + spcs.extend(rxn.reactants) + spcs.extend(rxn.products) + + ensure_independent_atom_ids(spcs, resonance=True) + # Get available RAM (GB)and procnum dependent on OS + if platform.startswith('linux'): + # linux + memoryavailable = psutil.virtual_memory().free / (1000.0 ** 3) + memoryuse = psutil.Process(os.getpid()).memory_info()[0]/(1000.0 ** 3) + tmp = divmod(memoryavailable, memoryuse) +# logging.info("Memory use is {0} GB, available memory is {2} GB and max allowed " +# "number of processes is {1}.".format(memoryuse, tmp[0], memoryavailable)) + tmp2 = min(maxproc, tmp[0]) + procnum = max(1, int(tmp2)) + elif platform == "darwin": + # OS X + memoryavailable = psutil.virtual_memory().available/(1000.0 ** 3) + memoryuse = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/(1000.0 ** 3) + tmp = divmod(memoryavailable, memoryuse) +# logging.info("Memory use is {0} GB, available memory is {2} GB and max allowed " +# "number of processes is {1}.".format(memoryuse, tmp[0], memoryavailable)) + tmp2 = min(maxproc, tmp[0]) + procnum = max(1, int(tmp2)) + else: + # Everything else + procnum = 1 + + # Execute multiprocessing map. It blocks until the result is ready. + # This method chops the iterable into a number of chunks which it + # submits to the process pool as separate tasks. + p = Pool(processes=procnum) + p.map(CalculateThermoParallel,spcs) +# for spc in spcs: +# spc.generate_resonance_structures() +# original_molecule = spc.molecule[0] +# # Returns unsorted list, depending on which one is returned fastest +# p.apply_async(submit_own, (original_molecule,)) + p.close() + p.join() + + ensure_independent_atom_ids(spcs, resonance=True) + for rxn, spc in zip(rxns, spcs): rxn = self.inflate(rxn) try: From 07e75fccdb1c5578d2454d251d6d4c6f45132e83 Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:31 -0400 Subject: [PATCH 03/28] Deprecate scoop functionality --- rmgpy/scoop_framework/util.py | 16 +++++++++++++++- rmgpy/thermo/thermoengine.py | 4 ++-- rmgpy/thermo/thermoengineTest.py | 29 ----------------------------- 3 files changed, 17 insertions(+), 32 deletions(-) diff --git a/rmgpy/scoop_framework/util.py b/rmgpy/scoop_framework/util.py index 1a593ef683..75b315592a 100644 --- a/rmgpy/scoop_framework/util.py +++ b/rmgpy/scoop_framework/util.py @@ -55,6 +55,9 @@ def warnScoopStartedProperly(func): @wraps(func) def wrapper(*args, **kwargs): + warnings.warn("The option scoop is no longer supported" + "and may be removed after Version: 2.4 ", DeprecationWarning) + futures_not_loaded = 'scoop.futures' not in sys.modules warnings.simplefilter('ignore', RuntimeWarning) @@ -104,6 +107,9 @@ class WorkerWrapper(object): """ __name__ = 'WorkerWrapper' + warnings.warn("The option scoop is no longer supported" + "and may be removed after Version: 2.4 ", DeprecationWarning) + def __init__(self, myfn): self.myfn = myfn @@ -121,7 +127,9 @@ def broadcast(obj, key): """ Broadcasts the object across the workers using the key parameter as the key. """ - + warnings.warn("The option scoop is no longer supported" + "and may be removed after Version: 2.4 ", DeprecationWarning) + kwargs = {key : obj} try: if shared.getConst(key): @@ -141,6 +149,8 @@ def get(key): parameter key. """ + warnings.warn("The option scoop is no longer supported" + "and may be removed after Version: 2.4 ", DeprecationWarning) try: data = shared.getConst(key, timeout=1e-9) return data @@ -151,6 +161,8 @@ def get(key): logger.debug('SCOOP not loaded. Not retrieving the shared object with key {}'.format(key)) def map_(*args, **kwargs): + warnings.warn("The option scoop is no longer supported" + "and may be removed after Version: 2.4 ", DeprecationWarning) return map(WorkerWrapper(args[0]), *args[1:], **kwargs) def submit_(func, *args, **kwargs): @@ -160,6 +172,8 @@ def submit_(func, *args, **kwargs): returns the return value of the called function, or when SCOOP is loaded, the future object. """ + warnings.warn("The option scoop is no longer supported" + "and may be removed after Version: 2.4 ", DeprecationWarning) try: task = submit(WorkerWrapper(func), *args, **kwargs)#returns immediately return task diff --git a/rmgpy/thermo/thermoengine.py b/rmgpy/thermo/thermoengine.py index 91322fa3a2..f436c5b9ff 100644 --- a/rmgpy/thermo/thermoengine.py +++ b/rmgpy/thermo/thermoengine.py @@ -32,7 +32,6 @@ import math import logging as logging -from rmgpy.scoop_framework.util import submit_ from rmgpy.data.rmg import getDB import rmgpy.constants as constants from rmgpy.molecule import Molecule @@ -185,4 +184,5 @@ def submit(spc, solventName = ''): the result. """ - spc.thermo = submit_(evaluator, spc, solventName= solventName) + spc.thermo = evaluator(spc, solventName= solventName) + diff --git a/rmgpy/thermo/thermoengineTest.py b/rmgpy/thermo/thermoengineTest.py index 5a5f78dbde..fb637eb027 100644 --- a/rmgpy/thermo/thermoengineTest.py +++ b/rmgpy/thermo/thermoengineTest.py @@ -41,7 +41,6 @@ from rmgpy import settings from rmgpy.data.rmg import RMGDatabase from rmgpy.rmg.main import RMG -from rmgpy.scoop_framework.framework import TestScoopCommon from rmgpy.species import Species from rmgpy.thermo.thermoengine import submit @@ -138,34 +137,6 @@ def funcSubmitGet(): return True -@work_in_progress -class AsyncThermoTest(TestScoopCommon): - - def __init__(self, *args, **kwargs): - # Parent initialization - super(self.__class__, self).__init__(*args, **kwargs) - - # Only setup the scoop framework once, and not in every test method: - super(self.__class__, self).setUp() - - @unittest.skipUnless(sys.platform.startswith("linux"), - "test currently only runs on linux") - def testSubmit(self): - """ - Test that we can submit a request to generate - thermo/transport for a number of species. - """ - result = futures._startup(funcSubmit) - self.assertEquals(result, True) - - @unittest.skipUnless(sys.platform.startswith("linux"), - "test currently only runs on linux") - def testGet(self): - """ - Test that we can get the data of a number of species. - """ - result = futures._startup(funcGet) - self.assertEquals(result, True) if __name__ == '__main__' and os.environ.get('IS_ORIGIN', "1") == "1": unittest.main() From 444906f28864af18ecbc61cd26dfb00fada9e2e7 Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:31 -0400 Subject: [PATCH 04/28] Remove retrieveNewSpecies --- rmgpy/rmg/model.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index 7a2b04e84e..cd620fbe3f 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -643,7 +643,6 @@ def enlarge(self, newObject=None, reactEdge=False, rxns = reactAll(self.core.species, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=trimolecularReact) - #spcs = [self.retrieveNewSpecies(rxn) for rxn in rxns] # get new species and save in spcs spcs = [] @@ -692,11 +691,6 @@ def enlarge(self, newObject=None, reactEdge=False, ensure_independent_atom_ids(spcs, resonance=True) for rxn, spc in zip(rxns, spcs): - rxn = self.inflate(rxn) - try: - rxn.reverse = self.inflate(rxn.reverse) - except AttributeError: - pass self.processNewReactions([rxn], spc) ################################################################ @@ -1963,20 +1957,6 @@ def getSpecies(self, obj): return spc return obj - def retrieveNewSpecies(self, deflatedRxn): - """ - Searches for the first reactant or product in the deflated reaction - that is represented by an integer. - - Such an object refers to a core species that was used to generate the - reaction in the first place. Reactants or products represented by an - object that is not an integer will be a newly-generated structure. - """ - for obj in itertools.chain(deflatedRxn.reactants, deflatedRxn.products): - if isinstance(obj, int): - return self.getSpecies(obj) - raise Exception("No core species were found in either reactants or products of {0}!".format(deflatedRxn)) - def generateReactionKey(rxn, useProducts=False): """ From cd40e1630d652d714d1ae2f4355f0428eb08cbf5 Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:31 -0400 Subject: [PATCH 05/28] Addressed some of Codacy/PR Quality Review issues. --- rmgpy/rmg/main.py | 10 ++++------ rmgpy/rmg/model.py | 11 ++++++----- rmgpy/rmg/react.py | 28 +++++++++++++++++----------- rmgpy/rmg/reactTest.py | 2 -- rmgpy/scoop_framework/util.py | 1 - rmgpy/thermo/thermoengine.py | 1 - rmgpy/thermo/thermoengineTest.py | 2 -- 7 files changed, 27 insertions(+), 28 deletions(-) diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py index 2de8049765..e4dfdff308 100644 --- a/rmgpy/rmg/main.py +++ b/rmgpy/rmg/main.py @@ -39,8 +39,6 @@ import logging import os import shutil -import psutil -import resource import numpy as np import gc @@ -60,7 +58,6 @@ from rmgpy.data.kinetics.family import KineticsFamily, TemplateReaction from rmgpy.rmg.pdep import PDepReaction -from rmgpy.data.thermo import ThermoLibrary from rmgpy.data.base import Entry from rmgpy import settings @@ -532,7 +529,6 @@ def initialize(self, **kwargs): if failsSpeciesConstraints(spec): if 'allowed' in self.speciesConstraints and 'input species' in self.speciesConstraints['allowed']: self.speciesConstraints['explicitlyAllowedMolecules'].append(spec.molecule[0]) - pass else: raise ForbiddenStructureException("Species constraints forbids input species {0}. Please reformulate constraints, remove the species, or explicitly allow it.".format(spec.label)) @@ -1706,8 +1702,10 @@ def loadRMGJavaInput(self, path): assert len(Tlist) > 0 assert len(Plist) > 0 concentrationList = np.array(concentrationList) - assert concentrationList.shape[1] > 0 # An arbitrary number of concentrations is acceptable, and should be run for each reactor system - + # An arbitrary number of concentrations is acceptable, and should be run for each reactor system + if not concentrationList.shape[1] > 0: + raise AssertionError() + # Make a reaction system for each (T,P) combination for T in Tlist: for P in Plist: diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index cd620fbe3f..1b37a2be11 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -33,7 +33,6 @@ """ import logging -import math import numpy import itertools import gc @@ -41,7 +40,6 @@ from rmgpy.display import display from rmgpy import settings -import rmgpy.constants as constants from rmgpy.constraints import failsSpeciesConstraints from rmgpy.quantity import Quantity from rmgpy.species import Species @@ -62,10 +60,13 @@ from pdep import PDepReaction, PDepNetwork -# generateThermoDataFromQM under the Species class imports the qm package - ################################################################################ -def CalculateThermoParallel(spc): +def calculate_thermo_parallel(spc): + """ + If quantumMechanics is turned on in the input file species thermo data is calculated + in this function. + """ + from rmgpy.rmg.input import getInput try: diff --git a/rmgpy/rmg/react.py b/rmgpy/rmg/react.py index b78a4dc6a7..e06110ef45 100644 --- a/rmgpy/rmg/react.py +++ b/rmgpy/rmg/react.py @@ -43,7 +43,7 @@ def react(*spcTuples): """ - Generate reactions between the species in the + Generate reactions between the species in the list of species tuples for all the reaction families available. For each tuple of one or more Species objects [(spc1,), (spc2, spc3), ...] @@ -89,16 +89,22 @@ def react(*spcTuples): logging.info('For reaction generation {0} process is used.'.format(procnum)) # Execute multiprocessing map. It blocks until the result is ready. - # This method chops the iterable into a number of chunks which it - # submits to the process pool as separate tasks. - p = Pool(processes=procnum) - - reactions = p.map( - reactSpecies, - spcTuples) + # This method chops the iterable into a number of chunks which it + # submits to the process pool as separate tasks. + if procnum == 1: + reactions = map( + reactSpecies, + spc_tuples) + else: + p = Pool(processes=procnum) + + reactions = p.map( + reactSpecies, + spc_tuples) + + p.close() + p.join() - p.close() - p.join() return itertools.chain.from_iterable(reactions) @@ -181,7 +187,7 @@ def reactAll(coreSpcList, numOldCoreSpecies, unimolecularReact, bimolecularReact # Identify and split families that are prone to generate many reactions into sublists. split_list = [] - for i in enumerate(split_list_tmp): + for i in range(len(split_list_tmp)): if split_list_tmp[i] == 'H_Abstraction': split_list_tmp[i] = [] split_list.append(['H_Abstraction']) diff --git a/rmgpy/rmg/reactTest.py b/rmgpy/rmg/reactTest.py index dbebe9d280..1e95423242 100644 --- a/rmgpy/rmg/reactTest.py +++ b/rmgpy/rmg/reactTest.py @@ -35,8 +35,6 @@ from rmgpy import settings from rmgpy.data.kinetics import TemplateReaction from rmgpy.data.rmg import RMGDatabase -from rmgpy.molecule import Molecule -from rmgpy.reaction import Reaction from rmgpy.species import Species from rmgpy.rmg.main import RMG diff --git a/rmgpy/scoop_framework/util.py b/rmgpy/scoop_framework/util.py index 75b315592a..2221eb3178 100644 --- a/rmgpy/scoop_framework/util.py +++ b/rmgpy/scoop_framework/util.py @@ -40,7 +40,6 @@ logger = None try: - from scoop import futures from scoop.futures import map, submit from scoop import shared from scoop import logger as scooplogger diff --git a/rmgpy/thermo/thermoengine.py b/rmgpy/thermo/thermoengine.py index f436c5b9ff..1176c20437 100644 --- a/rmgpy/thermo/thermoengine.py +++ b/rmgpy/thermo/thermoengine.py @@ -34,7 +34,6 @@ import logging as logging from rmgpy.data.rmg import getDB import rmgpy.constants as constants -from rmgpy.molecule import Molecule from rmgpy.statmech import Conformer from rmgpy.thermo import Wilhoit, NASA, ThermoData import rmgpy.data.rmg diff --git a/rmgpy/thermo/thermoengineTest.py b/rmgpy/thermo/thermoengineTest.py index fb637eb027..2a879a3e79 100644 --- a/rmgpy/thermo/thermoengineTest.py +++ b/rmgpy/thermo/thermoengineTest.py @@ -33,10 +33,8 @@ """ import os -import sys import unittest import random -from external.wip import work_in_progress from rmgpy import settings from rmgpy.data.rmg import RMGDatabase From 8cff1f8806a1d8c657b16d94fe50bcc9e618ca5b Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:31 -0400 Subject: [PATCH 06/28] QMTP parallel for new species. --- rmgpy/rmg/model.py | 150 ++++++++++++++++++++++++++++++--------------- 1 file changed, 100 insertions(+), 50 deletions(-) diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index 1b37a2be11..d92b8a7e2a 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -38,6 +38,12 @@ import gc import os +import resource +import psutil +from sys import platform + +from multiprocessing import Pool + from rmgpy.display import display from rmgpy import settings from rmgpy.constraints import failsSpeciesConstraints @@ -78,18 +84,20 @@ def calculate_thermo_parallel(spc): spc.generate_resonance_structures() original_molecule = spc.molecule[0] - if not quantumMechanics: +# if not quantumMechanics: +# pass +# else: + if quantumMechanics.settings.onlyCyclics and not original_molecule.isCyclic(): pass - else: - if quantumMechanics.settings.onlyCyclics and not original_molecule.isCyclic(): -# print 'pass' + #print 'pass' + else: + #print 'try a QM calculation' + if original_molecule.getRadicalCount() > quantumMechanics.settings.maxRadicalNumber: + pass + #print 'Too many radicals for direct calculation: use HBI.' else: - print 'try a QM calculation' - if original_molecule.getRadicalCount() > quantumMechanics.settings.maxRadicalNumber: - print 'Too many radicals for direct calculation: use HBI.' - else: - print 'Not too many radicals: do a direct calculation.' - thermo0 = quantumMechanics.getThermoData(original_molecule) # returns None if it fails + logging.info('Not too many radicals: do a direct QM calculation.') + thermo0 = quantumMechanics.getThermoData(original_molecule) # returns None if it fails class ReactionModel: """ @@ -645,49 +653,91 @@ def enlarge(self, newObject=None, reactEdge=False, rxns = reactAll(self.core.species, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=trimolecularReact) - # get new species and save in spcs - spcs = [] +# # Calculate reaction degeneracy +# from rmgpy.data.rmg import getDB +# rxns = find_degenerate_reactions(rxns, kinetics_database=getDB('kinetics')) + + # Get new species and save in spcs + spcs_tmp = [] for rxn in rxns: - spcs.extend(rxn.reactants) - spcs.extend(rxn.products) + spcs_tmp.extend(rxn.reactants) + spcs_tmp.extend(rxn.products) + + spcs = spcs_tmp - ensure_independent_atom_ids(spcs, resonance=True) - - # Get available RAM (GB)and procnum dependent on OS - if platform.startswith('linux'): - # linux - memoryavailable = psutil.virtual_memory().free / (1000.0 ** 3) - memoryuse = psutil.Process(os.getpid()).memory_info()[0]/(1000.0 ** 3) - tmp = divmod(memoryavailable, memoryuse) -# logging.info("Memory use is {0} GB, available memory is {2} GB and max allowed " -# "number of processes is {1}.".format(memoryuse, tmp[0], memoryavailable)) - tmp2 = min(maxproc, tmp[0]) - procnum = max(1, int(tmp2)) - elif platform == "darwin": - # OS X - memoryavailable = psutil.virtual_memory().available/(1000.0 ** 3) - memoryuse = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/(1000.0 ** 3) - tmp = divmod(memoryavailable, memoryuse) -# logging.info("Memory use is {0} GB, available memory is {2} GB and max allowed " -# "number of processes is {1}.".format(memoryuse, tmp[0], memoryavailable)) - tmp2 = min(maxproc, tmp[0]) - procnum = max(1, int(tmp2)) - else: - # Everything else - procnum = 1 + from rmgpy.rmg.input import getInput + try: + quantumMechanics = getInput('quantumMechanics') + except Exception: + logging.debug('Quantum Mechanics DB could not be found.') + quantumMechanics = None - # Execute multiprocessing map. It blocks until the result is ready. - # This method chops the iterable into a number of chunks which it - # submits to the process pool as separate tasks. - p = Pool(processes=procnum) - p.map(CalculateThermoParallel,spcs) -# for spc in spcs: -# spc.generate_resonance_structures() -# original_molecule = spc.molecule[0] -# # Returns unsorted list, depending on which one is returned fastest -# p.apply_async(submit_own, (original_molecule,)) - p.close() - p.join() + if not quantumMechanics: + pass + else: + if not spcs_tmp: + spcs = spcs_tmp + #pass + else: + # Generate unique list of species to be submitted to QM thermo calculation + from rmgpy.molecule.molecule import Molecule + # intilize list + spcs=[spcs_tmp[0]] + for counter, spc in enumerate (spcs_tmp): + #print("counter {0} spc{1}".format(counter,spc)) + for counter2, val in enumerate (spcs): + #print("counter2 {0} val{1}".format(counter2,val)) + if (spc.molecule[0].toSMILES() != val.molecule[0].toSMILES()): + #print("Potentially append reactant to list.") + appendReactant = True + else: + #print("Reactant already in list.") + appendReactant = False + break + if appendReactant: + #print("Append reactant to list.") + spcs.append(spc) + #print spcs_tmp + #print(spcs) + + # Calculate quantum thermo in parallel + from rmgpy.rmg.main import maxproc + + # Get available RAM (GB)and procnum dependent on OS + if platform.startswith('linux'): + # linux + memoryavailable = psutil.virtual_memory().free / (1000.0 ** 3) + memoryuse = psutil.Process(os.getpid()).memory_info()[0]/(1000.0 ** 3) + tmp = divmod(memoryavailable, memoryuse) + # logging.info("Memory use is {0} GB, available memory is {2} GB and max allowed " + # "number of processes is {1}.".format(memoryuse, tmp[0], memoryavailable)) + tmp2 = min(maxproc, tmp[0]) + procnum = max(1, int(tmp2)) + elif platform == "darwin": + # OS X + memoryavailable = psutil.virtual_memory().available/(1000.0 ** 3) + memoryuse = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/(1000.0 ** 3) + tmp = divmod(memoryavailable, memoryuse) + # logging.info("Memory use is {0} GB, available memory is {2} GB and max allowed " + # "number of processes is {1}.".format(memoryuse, tmp[0], memoryavailable)) + tmp2 = min(maxproc, tmp[0]) + procnum = max(1, int(tmp2)) + else: + # Everything else + procnum = 1 + + # Execute multiprocessing map. It blocks until the result is ready. + # This method chops the iterable into a number of chunks which it + # submits to the process pool as separate tasks. + p = Pool(processes=procnum) + p.map(CalculateThermoParallel,spcs) + # for spc in spcs: + # spc.generate_resonance_structures() + # original_molecule = spc.molecule[0] + # # Returns unsorted list, depending on which one is returned fastest + # p.apply_async(submit_own, (original_molecule,)) + p.close() + p.join() ensure_independent_atom_ids(spcs, resonance=True) From 0c3e9e679ea2178b9ba153236b63ddc217ff5714 Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:31 -0400 Subject: [PATCH 07/28] Update RMG documentation for parallel runs --- documentation/source/users/rmg/running.rst | 67 +++++++++++----------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/documentation/source/users/rmg/running.rst b/documentation/source/users/rmg/running.rst index 6d9849f389..972083a36c 100755 --- a/documentation/source/users/rmg/running.rst +++ b/documentation/source/users/rmg/running.rst @@ -4,7 +4,7 @@ Running a Job ************* -Running RMG job is easy and under different situations you might want add additional flag as the following examples. +Running a basic RMG job is straightforward. However, depending on your case you might want to add the flags outlined in the following examples. **Note:** In all these examples ``rmg.py`` should be the path to your installed RMG (eg. yours might be ``/Users/joeblogs/Code/RMG-Py/rmg.py``) and ``input.py`` is the path to the input file you wish to run (eg. yours might be ``RMG-runs/hexadiene/input.py``). If you get an error like ``python: can't open file 'rmg.py': [Errno 2] No such file or directory`` then probably the first of these is wrong. If you get an error like ``IOError: [Errno 2] No such file or directory: '/some/path/to/input.py'`` then probably the second of these is wrong. @@ -20,86 +20,83 @@ Run with CPU profiling:: python rmg.py input.py -p -We recommend you make a job-specific directory for each RMG simulation. Some jobs can take quite a while to complete, so we also recommend using a job scheduler (if working in an linux environment). +We recommend you make a job-specific directory for each RMG simulation. Some jobs can take quite a while to complete, so we also recommend using a job scheduler if working in an linux environment. -The instructions below describe more special cases for running an RMG job. +The instructions below describe special cases for running an RMG job. Running RMG in parallel with SLURM ---------------------------------- -RMG has the capability to run using multiple cores. Here is an example -job submission script for an RMG-Py job with a SLURM scheduler +RMG has the option to use multiple processes on one node for reaction generation and on-the-fly Quantum Mechanics Thermodynamic Property (QMTP) calculation. Here is an example submission script for an RMG-Py job with a SLURM scheduler. -The job named ``min_par`` reserves 24 CPUs on a single node -(``-np 24``), but uses only 12 workers (= 12 CPUs) in parallel during +The job reserves 24 tasks on a single node, but uses only 12 processes in parallel during the RMG-Py simulation. Make sure that: - the queue named ``debug`` exists on your SLURM scheduler. -- you modify the path to the parent folder of the RMG-Py installation folder -- you have an anaconda environment named ``rmg_env`` that contains RMG-Py's dependencies +- you modify the path to the parent folder of the RMG-Py installation folder. +- you have an anaconda environment named ``rmg_env`` that contains RMG-Py's dependencies. - the working directory from which you launched the job contains the RMG-Py input file ``input.py`` - -``-v`` adds verbosity to the output log file. - .. code:: bash #!/bin/bash + #SBATCH -p debug - #SBATCH -J min_par + #SBATCH -J jobname #SBATCH -n 24 - hosts=$(srun bash -c hostname) - - WORKERS=12 - + Processes=12 RMG_WS=/path/to/RMG/parent/folder export PYTHONPATH=$PYTHONPATH:$RMG_WS/RMG-Py/ source activate rmg_env - python -m scoop -n $WORKERS --host $hosts -v $RMG_WS/RMG-Py/rmg.py input.py + + python -n $Processes $RMG_WS/RMG-Py/rmg.py input.py + source deactivate Running RMG in parallel with SGE -------------------------------- -RMG has the capability to run using multiple cores. Here is an example -using the SGE scheduler. +RMG has the option to use multiple processes on one node for reaction generation and on-the-fly Quantum Mechanics Thermodynamic Property (QMTP) calculation. Here is an example submission script for an RMG-Py job with a SGE scheduler. -In order to help understand, the example job is also named ``min_par`` -reserving 24 CPUs on a single node (``#$ -pe singlenode 24``), but uses -only 12 workers (= 12 CPUs) in parallel during the RMG-Py simulation. +The job reserves 24 tasks on a single node, but uses only 12 processes in parallel during +the RMG-Py simulation. Make sure that: -- the queue named ``normal`` exists on your SGE scheduler +- the queue named ``debug`` exists on your SGE scheduler. - you modify the path to the parent folder of the RMG-Py installation - folder + folder. - you have an anaconda environment named ``rmg_env`` that contains - RMG-Py's dependencies + RMG-Py's dependencies. - the working directory from which you launched the job - contains the RMG-Py input file ``input.py`` - -``-v`` adds verbosity to the output log file + contains the RMG-Py input file ``input.py``. .. code:: bash #! /bin/bash - #$ -o job.log - #$ -l normal - #$ -N min_par + #$ -l debug + #$ -N jobname #$ -pe singlenode 24 - WORKERS=12 - + Processes=12 RMG_WS=/path/to/RMG/parent/folder export PYTHONPATH=$PYTHONPATH:$RMG_WS/RMG-Py/ source activate rmg_env - python -m scoop --tunnel -n $WORKERS -v $RMG_WS/RMG-Py/rmg.py input.py + + python -n $Processes $RMG_WS/RMG-Py/rmg.py input.py source deactivate + +Details on the implementation +-------------------------------- + +Currently, multiprocessing is implemented for reaction generation and the generation of QMfiles when using the QMTP option to compute thermodynamic properties of species. The processes are spawned and closed within each function. The number of processes is determined based on the ratio of currently available RAM and currently used RAM. The user can input the maximum number of allowed processes from the command line. For each reaction generation or QMTP call the number of processes will be the minimum value of either the number of allowed processes due to user input or the value obtained by the RAM ratio. The RAM limitation is employed, because multiprocessing is forking the base process and the memory limit (SWAP + RAM) might be exceeded when using too many processors for a base process large in memory. + +In python 3.4 new forking contexts 'spawn' and 'forkserver' are available. These methods will create new processes which share nothing or limited state with the parent and all memory passing is explicit. Once RMG is transferred to python 3 it is recommended to use the spawn or forkserver forking context to potentially allow for an increased number of processes. From 0da0280c20cd79d13442949e5de2a80f9a13479e Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 08/28] Minor code cleanup --- rmg.py | 2 +- rmgpy/rmg/main.py | 4 +++- rmgpy/rmg/model.py | 33 +++------------------------------ rmgpy/rmg/react.py | 16 ++++++++-------- 4 files changed, 15 insertions(+), 40 deletions(-) diff --git a/rmg.py b/rmg.py index 2401f0abfe..bc6c7caf84 100755 --- a/rmg.py +++ b/rmg.py @@ -126,7 +126,7 @@ def main(): args = parse_command_line_arguments() if args.postprocess: - print "Postprocessing the profiler statistics (will be appended to RMG.log)" + logging.info("Postprocessing the profiler statistics (will be appended to RMG.log)") else: # Initialize the logging system (resets the RMG.log file) level = logging.INFO diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py index e4dfdff308..67ee812672 100644 --- a/rmgpy/rmg/main.py +++ b/rmgpy/rmg/main.py @@ -495,7 +495,9 @@ def initialize(self, **kwargs): pass if maxproc > psutil.cpu_count(): - raise ValueError('Invalid input for user defined maximum number of processes {0}; should be an integer and smaller or equal to your available number of processes {1}'.format(maxproc, psutil.cpu_count())) + raise ValueError("""Invalid input for user defined maximum number of processes {0}; + should be an integer and smaller or equal to your available number of + processes {1}""".format(maxproc, psutil.cpu_count())) # Initialize reaction model if restart: diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index d92b8a7e2a..419107524e 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -49,6 +49,7 @@ from rmgpy.constraints import failsSpeciesConstraints from rmgpy.quantity import Quantity from rmgpy.species import Species +from rmgpy.molecule.molecule import Molecule from rmgpy.thermo.thermoengine import submit from rmgpy.reaction import Reaction from rmgpy.exceptions import ForbiddenStructureException @@ -67,6 +68,7 @@ from pdep import PDepReaction, PDepNetwork ################################################################################ + def calculate_thermo_parallel(spc): """ If quantumMechanics is turned on in the input file species thermo data is calculated @@ -84,17 +86,11 @@ def calculate_thermo_parallel(spc): spc.generate_resonance_structures() original_molecule = spc.molecule[0] -# if not quantumMechanics: -# pass -# else: if quantumMechanics.settings.onlyCyclics and not original_molecule.isCyclic(): pass - #print 'pass' else: - #print 'try a QM calculation' if original_molecule.getRadicalCount() > quantumMechanics.settings.maxRadicalNumber: pass - #print 'Too many radicals for direct calculation: use HBI.' else: logging.info('Not too many radicals: do a direct QM calculation.') thermo0 = quantumMechanics.getThermoData(original_molecule) # returns None if it fails @@ -649,14 +645,9 @@ def enlarge(self, newObject=None, reactEdge=False, else: # We are reacting the edge - rxns = reactAll(self.core.species, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=trimolecularReact) -# # Calculate reaction degeneracy -# from rmgpy.data.rmg import getDB -# rxns = find_degenerate_reactions(rxns, kinetics_database=getDB('kinetics')) - # Get new species and save in spcs spcs_tmp = [] for rxn in rxns: @@ -677,28 +668,19 @@ def enlarge(self, newObject=None, reactEdge=False, else: if not spcs_tmp: spcs = spcs_tmp - #pass else: # Generate unique list of species to be submitted to QM thermo calculation - from rmgpy.molecule.molecule import Molecule # intilize list spcs=[spcs_tmp[0]] for counter, spc in enumerate (spcs_tmp): - #print("counter {0} spc{1}".format(counter,spc)) for counter2, val in enumerate (spcs): - #print("counter2 {0} val{1}".format(counter2,val)) if (spc.molecule[0].toSMILES() != val.molecule[0].toSMILES()): - #print("Potentially append reactant to list.") appendReactant = True else: - #print("Reactant already in list.") appendReactant = False break if appendReactant: - #print("Append reactant to list.") spcs.append(spc) - #print spcs_tmp - #print(spcs) # Calculate quantum thermo in parallel from rmgpy.rmg.main import maxproc @@ -709,8 +691,6 @@ def enlarge(self, newObject=None, reactEdge=False, memoryavailable = psutil.virtual_memory().free / (1000.0 ** 3) memoryuse = psutil.Process(os.getpid()).memory_info()[0]/(1000.0 ** 3) tmp = divmod(memoryavailable, memoryuse) - # logging.info("Memory use is {0} GB, available memory is {2} GB and max allowed " - # "number of processes is {1}.".format(memoryuse, tmp[0], memoryavailable)) tmp2 = min(maxproc, tmp[0]) procnum = max(1, int(tmp2)) elif platform == "darwin": @@ -718,8 +698,6 @@ def enlarge(self, newObject=None, reactEdge=False, memoryavailable = psutil.virtual_memory().available/(1000.0 ** 3) memoryuse = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/(1000.0 ** 3) tmp = divmod(memoryavailable, memoryuse) - # logging.info("Memory use is {0} GB, available memory is {2} GB and max allowed " - # "number of processes is {1}.".format(memoryuse, tmp[0], memoryavailable)) tmp2 = min(maxproc, tmp[0]) procnum = max(1, int(tmp2)) else: @@ -730,12 +708,7 @@ def enlarge(self, newObject=None, reactEdge=False, # This method chops the iterable into a number of chunks which it # submits to the process pool as separate tasks. p = Pool(processes=procnum) - p.map(CalculateThermoParallel,spcs) - # for spc in spcs: - # spc.generate_resonance_structures() - # original_molecule = spc.molecule[0] - # # Returns unsorted list, depending on which one is returned fastest - # p.apply_async(submit_own, (original_molecule,)) + p.map(calculate_thermo_parallel,spcs) p.close() p.join() diff --git a/rmgpy/rmg/react.py b/rmgpy/rmg/react.py index e06110ef45..d586adb852 100644 --- a/rmgpy/rmg/react.py +++ b/rmgpy/rmg/react.py @@ -41,7 +41,7 @@ from rmgpy.data.rmg import getDB from multiprocessing import Pool -def react(*spcTuples): +def react(*spc_tuples): """ Generate reactions between the species in the list of species tuples for all the reaction families available. @@ -109,26 +109,26 @@ def react(*spcTuples): return itertools.chain.from_iterable(reactions) -def reactSpecies(speciesTupleTmp): +def reactSpecies(species_tuple_tmp): """ Given a tuple of Species objects, generates all possible reactions from the loaded reaction families and combines degenerate reactions. """ - speciesTuple = speciesTupleTmp[0:-1] - own_families = speciesTupleTmp[-1] + species_tuple = species_tuple_tmp[0:-1] + own_families = species_tuple_tmp[-1] - speciesTuple = tuple([spc.copy(deep=True) for spc in speciesTuple]) + species_tuple = tuple([spc.copy(deep=True) for spc in species_tuple]) - reactions = getDB('kinetics').generate_reactions_from_families(speciesTuple, only_families=own_families) + reactions = getDB('kinetics').generate_reactions_from_families(species_tuple, only_families=own_families) return reactions -def reactAll(coreSpcList, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=None): +def reactAll(core_spc_list, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=None): """ Reacts the core species list via uni-, bi-, and trimolecular - reactions. + reactions and splits reaction families per task for improved load balancing in parallel runs. """ from rmgpy.rmg.main import maxproc From 4cbaa1df41af932bca2c288d87fda6dcc63f69bd Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 09/28] Fixes to increase coverage. --- rmgpy/rmg/model.py | 40 ----------------------------------- rmgpy/rmg/modelTest.py | 1 - rmgpy/rmg/reactTest.py | 38 ++++++++++++++++++++------------- rmgpy/scoop_framework/util.py | 38 ++++++++++++++++----------------- 4 files changed, 42 insertions(+), 75 deletions(-) diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index 419107524e..f5f81ec67f 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -596,13 +596,6 @@ def enlarge(self, newObject=None, reactEdge=False, pdepNetwork, newSpecies = newObject newReactions.extend(pdepNetwork.exploreIsomer(newSpecies)) - for rxn in newReactions: - rxn = self.inflate(rxn) - try: - rxn.reverse = self.inflate(rxn.reverse) - except AttributeError: - pass - self.processNewReactions(newReactions, newSpecies, pdepNetwork) else: @@ -623,12 +616,6 @@ def enlarge(self, newObject=None, reactEdge=False, products = products.species if len(products) == 1 and products[0] == species: newReactions = network.exploreIsomer(species) - for rxn in newReactions: - rxn = self.inflate(rxn) - try: - rxn.reverse = self.inflate(rxn.reverse) - except AttributeError: - pass self.processNewReactions(newReactions, species, network) network.updateConfigurations(self) @@ -1944,33 +1931,6 @@ def retrieve(self, family_label, key1, key2): except KeyError: # no such short-list: must be new, unless in seed. return [] - def inflate(self, rxn): - """ - Convert reactions from - reactants/products that are referring - to the core species index, to the respective Species objects. - """ - reactants, products, pairs = [], [], [] - - for reactant in rxn.reactants: - reactant = self.getSpecies(reactant) - reactants.append(reactant) - - for product in rxn.products: - product = self.getSpecies(product) - products.append(product) - - for reactant, product in rxn.pairs: - reactant = self.getSpecies(reactant) - product = self.getSpecies(product) - pairs.append((reactant, product)) - - rxn.reactants = reactants - rxn.products = products - rxn.pairs = pairs - - return rxn - def getSpecies(self, obj): """ Retrieve species object, by diff --git a/rmgpy/rmg/modelTest.py b/rmgpy/rmg/modelTest.py index 392f7958c5..233a8ba19c 100644 --- a/rmgpy/rmg/modelTest.py +++ b/rmgpy/rmg/modelTest.py @@ -42,7 +42,6 @@ from rmgpy.data.thermo import * ################################################### - class TestSpecies(unittest.TestCase): """ Contains unit tests of the Species class. diff --git a/rmgpy/rmg/reactTest.py b/rmgpy/rmg/reactTest.py index 1e95423242..d62559d723 100644 --- a/rmgpy/rmg/reactTest.py +++ b/rmgpy/rmg/reactTest.py @@ -42,7 +42,12 @@ ################################################### -TESTFAMILY = 'H_Abstraction' +TESTFAMILY = ['H_Abstraction','R_Recombination','Intra_Disproportionation','Intra_RH_Add_Endocyclic', + 'Singlet_Carbene_Intra_Disproportionation','Intra_ene_reaction','Disproportionation', + '1,4_Linear_birad_scission','R_Addition_MultipleBond','2+2_cycloaddition_Cd','Diels_alder_addition', + 'Intra_RH_Add_Exocyclic','Intra_Retro_Diels_alder_bicyclic','Intra_2+2_cycloaddition_Cd', + 'Birad_recombination','Intra_Diels_alder_monocyclic','1,4_Cyclic_birad_scission', + '1,2_Insertion_carbene'] class TestReact(unittest.TestCase): @@ -61,21 +66,21 @@ def setUp(self): self.rmg.database.loadForbiddenStructures(os.path.join(path, 'forbiddenStructures.py')) # kinetics family loading self.rmg.database.loadKinetics(os.path.join(path, 'kinetics'), - kineticsFamilies=[TESTFAMILY], + kineticsFamilies=TESTFAMILY, reactionLibraries=[] ) - def testReact(self): - """ - Test that reaction generation from the available families works. - """ - spcA = Species().fromSMILES('[OH]') - spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] - spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs] - - reactionList = list(react(*spcTuples)) - self.assertIsNotNone(reactionList) - self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in reactionList])) +# def testReact(self): +# """ +# Test that reaction generation from the available families works. +# """ +# spcA = Species().fromSMILES('[OH]') +# spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] +# spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs] +# +# reactionList = list(react(*spcTuples)) +# self.assertIsNotNone(reactionList) +# self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in reactionList])) def testReactMultiproc(self): """ @@ -96,15 +101,18 @@ def testReactAll(self): """ Test that the reactAll function works. """ + import rmgpy.rmg.main + rmgpy.rmg.main.maxproc = 2 spcs = [ Species().fromSMILES('CC'), Species().fromSMILES('[CH3]'), - Species().fromSMILES('[OH]') + Species().fromSMILES('[OH]'), + Species().fromSMILES('CCCCCCCCCCC') ] N = len(spcs) - rxns = reactAll(spcs, N, np.ones(N), np.ones([N,N])) + rxns = reactAll(spcs, N, np.ones(N), np.ones([N,N]), np.ones([N,N,N])) self.assertIsNotNone(rxns) self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in rxns])) diff --git a/rmgpy/scoop_framework/util.py b/rmgpy/scoop_framework/util.py index 2221eb3178..900dd37cad 100644 --- a/rmgpy/scoop_framework/util.py +++ b/rmgpy/scoop_framework/util.py @@ -129,17 +129,17 @@ def broadcast(obj, key): warnings.warn("The option scoop is no longer supported" "and may be removed after Version: 2.4 ", DeprecationWarning) - kwargs = {key : obj} - try: - if shared.getConst(key): - logger.debug('An object with the key {} was already broadcasted.'.format(key)) - else: - shared.setConst(**kwargs) - except NameError: - """ - Name error will be caught when the SCOOP library is not imported properly. - """ - logger.debug('SCOOP not loaded. Not broadcasting the object {}'.format(obj)) + # kwargs = {key : obj} + # try: + # if shared.getConst(key): + # logger.debug('An object with the key {} was already broadcasted.'.format(key)) + # else: + # shared.setConst(**kwargs) + # except NameError, e: + # """ + # Name error will be caught when the SCOOP library is not imported properly. + # """ + # logger.debug('SCOOP not loaded. Not broadcasting the object {}'.format(obj)) @warnScoopStartedProperly def get(key): @@ -150,14 +150,14 @@ def get(key): warnings.warn("The option scoop is no longer supported" "and may be removed after Version: 2.4 ", DeprecationWarning) - try: - data = shared.getConst(key, timeout=1e-9) - return data - except NameError: - """ - Name error will be caught when the SCOOP library is not imported properly. - """ - logger.debug('SCOOP not loaded. Not retrieving the shared object with key {}'.format(key)) + # try: + # data = shared.getConst(key, timeout=1e-9) + # return data + # except NameError: + # """ + # Name error will be caught when the SCOOP library is not imported properly. + # """ + # logger.debug('SCOOP not loaded. Not retrieving the shared object with key {}'.format(key)) def map_(*args, **kwargs): warnings.warn("The option scoop is no longer supported" From 636fc1f9f212a45a3e8196f569b7cfb2679734b3 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 10/28] Refactor family splitting and some code style changes Simplify code for family splitting Rename functions in react module using underscores Remove reactPdep functions and use react_species instead --- rmgpy/rmg/model.py | 6 +- rmgpy/rmg/modelTest.py | 6 +- rmgpy/rmg/pdep.py | 4 +- rmgpy/rmg/react.py | 259 ++++++++++------------------------------- rmgpy/rmg/reactTest.py | 6 +- rmgpy/rmg/rmgTest.py | 8 +- 6 files changed, 74 insertions(+), 215 deletions(-) diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index f5f81ec67f..4f430a50e7 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -62,7 +62,7 @@ from rmgpy.data.rmg import getDB import rmgpy.data.rmg -from .react import reactAll +from .react import react_all from rmgpy.data.kinetics.common import ensure_independent_atom_ids, find_degenerate_reactions from pdep import PDepReaction, PDepNetwork @@ -632,8 +632,8 @@ def enlarge(self, newObject=None, reactEdge=False, else: # We are reacting the edge - rxns = reactAll(self.core.species, numOldCoreSpecies, - unimolecularReact, bimolecularReact, trimolecularReact=trimolecularReact) + rxns = react_all(self.core.species, numOldCoreSpecies, + unimolecularReact, bimolecularReact, trimolecularReact=trimolecularReact) # Get new species and save in spcs spcs_tmp = [] diff --git a/rmgpy/rmg/modelTest.py b/rmgpy/rmg/modelTest.py index 233a8ba19c..3c60ca07fc 100644 --- a/rmgpy/rmg/modelTest.py +++ b/rmgpy/rmg/modelTest.py @@ -139,10 +139,10 @@ class item: spcA = Species().fromSMILES('[OH]') spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] - spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs] + spcTuples = [((spcA, spc), ['H_Abstraction']) for spc in spcs] rxns = list(react(*spcTuples)) - rxns += list(react(*[(spcs[0], spcs[1], ['H_Abstraction'])])) + rxns += list(react(*[((spcs[0], spcs[1]), ['H_Abstraction'])])) for rxn in rxns: cerm.makeNewReaction(rxn) @@ -242,7 +242,7 @@ def testMakeNewReaction(self): spcA = Species().fromSMILES('[OH]') spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] - spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs] + spcTuples = [((spcA, spc), ['H_Abstraction']) for spc in spcs] rxns = list(react(*spcTuples)) diff --git a/rmgpy/rmg/pdep.py b/rmgpy/rmg/pdep.py index 30bd40dd59..fe5cef42a3 100644 --- a/rmgpy/rmg/pdep.py +++ b/rmgpy/rmg/pdep.py @@ -44,7 +44,7 @@ from rmgpy.constants import R from rmgpy.pdep import Conformer, Configuration -from rmgpy.rmg.react import reactPdep +from rmgpy.rmg.react import react_species from rmgpy.exceptions import PressureDependenceError, NetworkError from rmgpy.data.kinetics.library import LibraryReaction @@ -300,7 +300,7 @@ def exploreIsomer(self, isomer): # Don't find reactions involving the new species as bimolecular # reactants or products with other core species (e.g. A + B <---> products) - newReactions = reactPdep((isomer,)) + newReactions = react_species((isomer,)) return newReactions diff --git a/rmgpy/rmg/react.py b/rmgpy/rmg/react.py index d586adb852..fe3d15195e 100644 --- a/rmgpy/rmg/react.py +++ b/rmgpy/rmg/react.py @@ -41,6 +41,7 @@ from rmgpy.data.rmg import getDB from multiprocessing import Pool + def react(*spc_tuples): """ Generate reactions between the species in the @@ -92,40 +93,37 @@ def react(*spc_tuples): # This method chops the iterable into a number of chunks which it # submits to the process pool as separate tasks. if procnum == 1: - reactions = map( - reactSpecies, - spc_tuples) + reactions = map(_react_species_star, spc_tuples) else: p = Pool(processes=procnum) - reactions = p.map( - reactSpecies, - spc_tuples) - + reactions = p.map(_react_species_star, spc_tuples) + p.close() p.join() - return itertools.chain.from_iterable(reactions) -def reactSpecies(species_tuple_tmp): +def _react_species_star(args): + """Wrapper to unpack zipped arguments for use with map""" + return react_species(*args) + + +def react_species(species_tuple, only_families=None): """ Given a tuple of Species objects, generates all possible reactions from the loaded reaction families and combines degenerate reactions. """ - species_tuple = species_tuple_tmp[0:-1] - own_families = species_tuple_tmp[-1] - species_tuple = tuple([spc.copy(deep=True) for spc in species_tuple]) - reactions = getDB('kinetics').generate_reactions_from_families(species_tuple, only_families=own_families) + reactions = getDB('kinetics').generate_reactions_from_families(species_tuple, only_families=only_families) return reactions -def reactAll(core_spc_list, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=None): +def react_all(core_spc_list, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=None): """ Reacts the core species list via uni-, bi-, and trimolecular reactions and splits reaction families per task for improved load balancing in parallel runs. @@ -133,198 +131,59 @@ def reactAll(core_spc_list, numOldCoreSpecies, unimolecularReact, bimolecularRea from rmgpy.rmg.main import maxproc - # Load kineticsFamilies to be added to reactant tuple to allow for improved load balancing - # in parallel jobs. - split_listOrig = [] - split_list_tmp = [] - for key in getDB('kinetics').families: - split_listOrig.append(key) - split_list_tmp.append(key) + # Select reactive species that can undergo unimolecular reactions: + spc_tuples = [(core_spc_list[i],) + for i in xrange(numOldCoreSpecies) if (unimolecularReact[i] and core_spc_list[i].reactive)] - if maxproc == 1: - # Select reactive species that can undergo unimolecular reactions: - spc_tuplestmp = [(core_spc_list[i], split_listOrig) - for i in xrange(numOldCoreSpecies) if (unimolecularReact[i] and core_spc_list[i].reactive)] - - for i in xrange(numOldCoreSpecies): - for j in xrange(i, numOldCoreSpecies): - # Find reactions involving the species that are bimolecular. - # This includes a species reacting with itself (if its own concentration is high enough). - if bimolecularReact[i,j]: - if core_spc_list[i].reactive and core_spc_list[j].reactive: - spc_tuplestmp.append((core_spc_list[i], core_spc_list[j], split_listOrig)) - - if trimolecularReact is not None: - for i in xrange(numOldCoreSpecies): - for j in xrange(i, numOldCoreSpecies): - for k in xrange(j, numOldCoreSpecies): - # Find reactions involving the species that are trimolecular. - if trimolecularReact[i,j,k]: - if core_spc_list[i].reactive and core_spc_list[j].reactive and core_spc_list[k].reactive: - spc_tuplestmp.append((core_spc_list[i], core_spc_list[j], core_spc_list[k], split_listOrig)) - else: - # Select reactive species that can undergo unimolecular reactions: - spc_tuples = [(core_spc_list[i],) - for i in xrange(numOldCoreSpecies) if (unimolecularReact[i] and core_spc_list[i].reactive)] - + for i in xrange(numOldCoreSpecies): + for j in xrange(i, numOldCoreSpecies): + # Find reactions involving the species that are bimolecular. + # This includes a species reacting with itself (if its own concentration is high enough). + if bimolecularReact[i, j]: + if core_spc_list[i].reactive and core_spc_list[j].reactive: + spc_tuples.append((core_spc_list[i], core_spc_list[j])) + + if trimolecularReact is not None: for i in xrange(numOldCoreSpecies): for j in xrange(i, numOldCoreSpecies): - # Find reactions involving the species that are bimolecular. - # This includes a species reacting with itself (if its own concentration is high enough). - if bimolecularReact[i,j]: - if core_spc_list[i].reactive and core_spc_list[j].reactive: - spc_tuples.append((core_spc_list[i], core_spc_list[j])) - - if trimolecularReact is not None: - for i in xrange(numOldCoreSpecies): - for j in xrange(i, numOldCoreSpecies): - for k in xrange(j, numOldCoreSpecies): - # Find reactions involving the species that are trimolecular. - if trimolecularReact[i,j,k]: - if core_spc_list[i].reactive and core_spc_list[j].reactive and core_spc_list[k].reactive: - spc_tuples.append((core_spc_list[i], core_spc_list[j], core_spc_list[k])) - + for k in xrange(j, numOldCoreSpecies): + # Find reactions involving the species that are trimolecular. + if trimolecularReact[i, j, k]: + if core_spc_list[i].reactive and core_spc_list[j].reactive and core_spc_list[k].reactive: + spc_tuples.append((core_spc_list[i], core_spc_list[j], core_spc_list[k])) + if maxproc == 1: + # React all families like normal (provide empty argument for only_families) + spc_fam_tuples = zip(spc_tuples) + else: # Identify and split families that are prone to generate many reactions into sublists. + family_list = getDB('kinetics').families.keys() + major_families = [ + 'H_Abstraction', 'R_Recombination', 'Intra_Disproportionation', 'Intra_RH_Add_Endocyclic', + 'Singlet_Carbene_Intra_Disproportionation', 'Intra_ene_reaction', 'Disproportionation', + '1,4_Linear_birad_scission', 'R_Addition_MultipleBond', '2+2_cycloaddition_Cd', 'Diels_alder_addition', + 'Intra_RH_Add_Exocyclic', 'Intra_Retro_Diels_alder_bicyclic', 'Intra_2+2_cycloaddition_Cd', + 'Birad_recombination', 'Intra_Diels_alder_monocyclic', '1,4_Cyclic_birad_scission', '1,2_Insertion_carbene', + ] + split_list = [] - for i in range(len(split_list_tmp)): - if split_list_tmp[i] == 'H_Abstraction': - split_list_tmp[i] = [] - split_list.append(['H_Abstraction']) - elif split_list_tmp[i] == 'R_Recombination': - split_list_tmp[i] = [] - split_list.append(['R_Recombination']) - elif split_list_tmp[i] == 'Intra_Disproportionation': - split_list_tmp[i] = [] - split_list.append(['Intra_Disproportionation']) - elif split_list_tmp[i] == 'Intra_RH_Add_Endocyclic': - split_list_tmp[i] = [] - split_list.append(['Intra_RH_Add_Endocyclic']) - elif split_list_tmp[i] == 'Singlet_Carbene_Intra_Disproportionation': - split_list_tmp[i] = [] - split_list.append(['Singlet_Carbene_Intra_Disproportionation']) - elif split_list_tmp[i] == 'Intra_ene_reaction': - split_list_tmp[i] = [] - split_list.append(['Intra_ene_reaction']) - elif split_list_tmp[i] == 'Disproportionation': - split_list_tmp[i] = [] - split_list.append(['Disproportionation']) - elif split_list_tmp[i] == '1,4_Linear_birad_scission': - split_list_tmp[i] = [] - split_list.append(['1,4_Linear_birad_scission']) - elif split_list_tmp[i] == 'R_Addition_MultipleBond': - split_list_tmp[i] = [] - split_list.append(['R_Addition_MultipleBond']) - elif split_list_tmp[i] == '2+2_cycloaddition_Cd': - split_list_tmp[i] = [] - split_list.append(['2+2_cycloaddition_Cd']) - elif split_list_tmp[i] == 'Diels_alder_addition': - split_list_tmp[i] = [] - split_list.append(['Diels_alder_addition']) - elif split_list_tmp[i] == 'Intra_RH_Add_Exocyclic': - split_list_tmp[i] = [] - split_list.append(['Intra_RH_Add_Exocyclic']) - elif split_list_tmp[i] == 'Intra_Retro_Diels_alder_bicyclic': - split_list_tmp[i] = [] - split_list.append(['Intra_Retro_Diels_alder_bicyclic']) - elif split_list_tmp[i] == 'Intra_2+2_cycloaddition_Cd': - split_list_tmp[i] = [] - split_list.append(['Intra_2+2_cycloaddition_Cd']) - elif split_list_tmp[i] == 'Birad_recombination': - split_list_tmp[i] = [] - split_list.append(['Birad_recombination']) - elif split_list_tmp[i] == 'Intra_Diels_alder_monocyclic': - split_list_tmp[i] = [] - split_list.append(['Intra_Diels_alder_monocyclic']) - elif split_list_tmp[i] == '1,4_Cyclic_birad_scission': - split_list_tmp[i] = [] - split_list.append(['1,4_Cyclic_birad_scission']) - elif split_list_tmp[i] == '1,2_Insertion_carbene': - split_list_tmp[i] = [] - split_list.append(['1,2_Insertion_carbene']) - - # Remove empty lists from remaining split_list_tmp. It now contains only - # families that are not mentioned above. - split_list.append(filter(None, split_list_tmp)) - - # Only employ family splitting for reactants that have a larger number than nAFS. - nAFS = 10 - - spc_tuplestmp = [] - # Append reaction families to reactant tuple. - for tmpj in spc_tuples: - if len(tmpj) == 1: - if len(str(tmpj[0])) > nAFS: - for tmpl in split_list: - tmpk = list(tmpj) - tmpk.append(tmpl) - spc_tuplestmp.append(tuple(tmpk)) - else: - tmpk = list(tmpj) - tmpk.append(split_listOrig) - spc_tuplestmp.append(tuple(tmpk)) - elif len(tmpj) == 2: - if (len(str(tmpj[0])) > nAFS - ) or (len(str(tmpj[1])) > nAFS): - for tmpl in split_list: - tmpk = list(tmpj) - tmpk.append(tmpl) - spc_tuplestmp.append(tuple(tmpk)) - else: - tmpk = list(tmpj) - tmpk.append(split_listOrig) - spc_tuplestmp.append(tuple(tmpk)) + leftovers = [] + for fam in family_list: + if fam in major_families: + split_list.append([fam]) else: - if (len(str(tmpj[0])) > nAFS - ) or (len(str(tmpj[1])) > nAFS - ) or (len(str(tmpj[2])) > nAFS): - for tmpl in split_list: - tmpk = list(tmpj) - tmpk.append(tmpl) - spc_tuplestmp.append(tuple(tmpk)) - else: - tmpk = list(tmpj) - tmpk.append(split_listOrig) - spc_tuplestmp.append(tuple(tmpk)) - - rxns = list(react(*spc_tuplestmp)) - - return rxns - -def reactPdep(*spc_tuples): - """ - Generate reactions between the species in the - list of species tuples for all the reaction families available. - - For each tuple of one or more Species objects [(spc1,), (spc2, spc3), ...] - the following is done: - - A list of tuples is created for each resonance isomer of the species. - Each tuple consists of (Molecule, index) with the index the species index of the Species object. - - Possible combinations between the first spc in the tuple, and the second species in the tuple - is obtained by taking the combinatorial product of the two generated [(Molecule, index)] lists. - - Returns a flat generator object containing the generated Reaction objects. - """ - - reactions = map( - react_species_pdep, - spc_tuples) - - return itertools.chain.from_iterable(reactions) - - -def react_species_pdep(species_tuple): - """ - Given a tuple of Species objects, generates all possible reactions - from the loaded reaction families and combines degenerate reactions. - """ - - species_tuple = tuple([spc.copy(deep=True) for spc in species_tuple]) - - reactions = getDB('kinetics').generate_reactions_from_families(species_tuple) - - return reactions + leftovers.append(fam) + split_list.append(leftovers) + + # Only employ family splitting for reactants that have a larger number than min_atoms + min_atoms = 10 + spc_fam_tuples = [] + for i, spc_tuple in enumerate(spc_tuples): + if any([len(spc.molecule[0].atoms) > min_atoms for spc in spc_tuple]): + for item in split_list: + spc_fam_tuples.append((spc_tuple, item)) + else: + spc_fam_tuples.append((spc_tuple,)) + return list(react(*spc_fam_tuples)) diff --git a/rmgpy/rmg/reactTest.py b/rmgpy/rmg/reactTest.py index d62559d723..b70ecdad0f 100644 --- a/rmgpy/rmg/reactTest.py +++ b/rmgpy/rmg/reactTest.py @@ -38,7 +38,7 @@ from rmgpy.species import Species from rmgpy.rmg.main import RMG -from rmgpy.rmg.react import react, reactAll +from rmgpy.rmg.react import react, react_all ################################################### @@ -91,7 +91,7 @@ def testReactMultiproc(self): spcA = Species().fromSMILES('[OH]') spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] - spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs] + spcTuples = [((spcA, spc), ['H_Abstraction']) for spc in spcs] reactionList = list(react(*spcTuples)) self.assertIsNotNone(reactionList) @@ -112,7 +112,7 @@ def testReactAll(self): ] N = len(spcs) - rxns = reactAll(spcs, N, np.ones(N), np.ones([N,N]), np.ones([N,N,N])) + rxns = react_all(spcs, N, np.ones(N), np.ones([N, N]), np.ones([N, N, N])) self.assertIsNotNone(rxns) self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in rxns])) diff --git a/rmgpy/rmg/rmgTest.py b/rmgpy/rmg/rmgTest.py index b905fbc029..c68cbc771d 100644 --- a/rmgpy/rmg/rmgTest.py +++ b/rmgpy/rmg/rmgTest.py @@ -37,7 +37,7 @@ from rmgpy import settings from rmgpy.data.rmg import RMGDatabase from rmgpy.molecule import Molecule -from rmgpy.rmg.react import react +from rmgpy.rmg.react import react_species from rmgpy.restart import saveRestartFile import rmgpy from rmgpy.data.base import ForbiddenStructures @@ -97,7 +97,7 @@ def testDeterministicReactionTemplateMatching(self): # react spc = Species().fromSMILES("O=C[C]=C") spc.generate_resonance_structures() - newReactions = react((spc,)) + newReactions = react_species((spc,)) # try to pick out the target reaction mol_H = Molecule().fromSMILES("[H]") @@ -111,7 +111,7 @@ def testDeterministicReactionTemplateMatching(self): # react again newReactions_reverse = [] - newReactions_reverse.extend(react((spc,))) + newReactions_reverse.extend(react_species((spc,))) # try to pick out the target reaction target_rxns_reverse = findTargetRxnsContaining(mol_H, mol_C3H2O, newReactions_reverse) @@ -185,7 +185,7 @@ def testRestartFileGenerationAndParsing(self): self.rmg.reactionModel.core.species.append(spc2) newReactions = [] - newReactions.extend(react((spc1, spc2, ['H_Abstraction']))) + newReactions.extend(react_species((spc1, spc2))) # process newly generated reactions to make sure no duplicated reactions self.rmg.reactionModel.processNewReactions(newReactions, spc2, None) From bff7c1bbbe407949ecfb97a84e0c4d7ca13b8876 Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 11/28] Function to determine procnum based on RAM. --- rmgpy/rmg/model.py | 26 +++------------------- rmgpy/rmg/react.py | 49 +++++++++++++++++++++++++----------------- rmgpy/rmg/reactTest.py | 2 +- 3 files changed, 33 insertions(+), 44 deletions(-) diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index 4f430a50e7..c36683a38e 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -62,7 +62,7 @@ from rmgpy.data.rmg import getDB import rmgpy.data.rmg -from .react import react_all +from .react import react_all, determine_procnum_from_RAM from rmgpy.data.kinetics.common import ensure_independent_atom_ids, find_degenerate_reactions from pdep import PDepReaction, PDepNetwork @@ -669,28 +669,8 @@ def enlarge(self, newObject=None, reactEdge=False, if appendReactant: spcs.append(spc) - # Calculate quantum thermo in parallel - from rmgpy.rmg.main import maxproc - - # Get available RAM (GB)and procnum dependent on OS - if platform.startswith('linux'): - # linux - memoryavailable = psutil.virtual_memory().free / (1000.0 ** 3) - memoryuse = psutil.Process(os.getpid()).memory_info()[0]/(1000.0 ** 3) - tmp = divmod(memoryavailable, memoryuse) - tmp2 = min(maxproc, tmp[0]) - procnum = max(1, int(tmp2)) - elif platform == "darwin": - # OS X - memoryavailable = psutil.virtual_memory().available/(1000.0 ** 3) - memoryuse = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/(1000.0 ** 3) - tmp = divmod(memoryavailable, memoryuse) - tmp2 = min(maxproc, tmp[0]) - procnum = max(1, int(tmp2)) - else: - # Everything else - procnum = 1 - + procnum = determine_procnum_from_RAM() + # Execute multiprocessing map. It blocks until the result is ready. # This method chops the iterable into a number of chunks which it # submits to the process pool as separate tasks. diff --git a/rmgpy/rmg/react.py b/rmgpy/rmg/react.py index fe3d15195e..be1721360a 100644 --- a/rmgpy/rmg/react.py +++ b/rmgpy/rmg/react.py @@ -41,27 +41,14 @@ from rmgpy.data.rmg import getDB from multiprocessing import Pool +################################################################################ -def react(*spc_tuples): +def determine_procnum_from_RAM(): """ - Generate reactions between the species in the - list of species tuples for all the reaction families available. - - For each tuple of one or more Species objects [(spc1,), (spc2, spc3), ...] - the following is done: - - A list of tuples is created for each resonance isomer of the species. - Each tuple consists of (Molecule, index) with the index the species index of the Species object. - - Possible combinations between the first spc in the tuple, and the second species in the tuple - is obtained by taking the combinatorial product of the two generated [(Molecule, index)] lists. - - Returns a flat generator object containing the generated Reaction objects. + Get available RAM (GB)and procnum dependent on OS. """ - from rmgpy.rmg.main import maxproc - # Get available RAM (GB)and procnum dependent on OS. if platform.startswith('linux'): # linux memory_available = psutil.virtual_memory().free / (1000.0 ** 3) @@ -69,7 +56,7 @@ def react(*spc_tuples): tmp = divmod(memory_available, memory_use) tmp2 = min(maxproc, tmp[0]) procnum = max(1, int(tmp2)) - if maxproc == 1: + if procnum == 1: logging.info('For reaction generation {0} process is used.'.format(procnum)) else: logging.info('For reaction generation {0} processes are used.'.format(procnum)) @@ -80,7 +67,7 @@ def react(*spc_tuples): tmp = divmod(memory_available, memory_use) tmp2 = min(maxproc, tmp[0]) procnum = max(1, int(tmp2)) - if maxproc == 1: + if procnum == 1: logging.info('For reaction generation {0} process is used.'.format(procnum)) else: logging.info('For reaction generation {0} processes are used.'.format(procnum)) @@ -89,6 +76,28 @@ def react(*spc_tuples): procnum = 1 logging.info('For reaction generation {0} process is used.'.format(procnum)) + # Return the maximal number of processes for multiprocessing + return procnum + +def react(*spc_tuples): + """ + Generate reactions between the species in the + list of species tuples for all the reaction families available. + + For each tuple of one or more Species objects [(spc1,), (spc2, spc3), ...] + the following is done: + + A list of tuples is created for each resonance isomer of the species. + Each tuple consists of (Molecule, index) with the index the species index of the Species object. + + Possible combinations between the first spc in the tuple, and the second species in the tuple + is obtained by taking the combinatorial product of the two generated [(Molecule, index)] lists. + + Returns a flat generator object containing the generated Reaction objects. + """ + + procnum = determine_procnum_from_RAM() + # Execute multiprocessing map. It blocks until the result is ready. # This method chops the iterable into a number of chunks which it # submits to the process pool as separate tasks. @@ -129,7 +138,7 @@ def react_all(core_spc_list, numOldCoreSpecies, unimolecularReact, bimolecularRe reactions and splits reaction families per task for improved load balancing in parallel runs. """ - from rmgpy.rmg.main import maxproc + procnum = determine_procnum_from_RAM() # Select reactive species that can undergo unimolecular reactions: spc_tuples = [(core_spc_list[i],) @@ -152,7 +161,7 @@ def react_all(core_spc_list, numOldCoreSpecies, unimolecularReact, bimolecularRe if core_spc_list[i].reactive and core_spc_list[j].reactive and core_spc_list[k].reactive: spc_tuples.append((core_spc_list[i], core_spc_list[j], core_spc_list[k])) - if maxproc == 1: + if procnum == 1: # React all families like normal (provide empty argument for only_families) spc_fam_tuples = zip(spc_tuples) else: diff --git a/rmgpy/rmg/reactTest.py b/rmgpy/rmg/reactTest.py index b70ecdad0f..48972d0243 100644 --- a/rmgpy/rmg/reactTest.py +++ b/rmgpy/rmg/reactTest.py @@ -47,7 +47,7 @@ '1,4_Linear_birad_scission','R_Addition_MultipleBond','2+2_cycloaddition_Cd','Diels_alder_addition', 'Intra_RH_Add_Exocyclic','Intra_Retro_Diels_alder_bicyclic','Intra_2+2_cycloaddition_Cd', 'Birad_recombination','Intra_Diels_alder_monocyclic','1,4_Cyclic_birad_scission', - '1,2_Insertion_carbene'] + '1,2_Insertion_carbene','1,2_Insertion_CO'] class TestReact(unittest.TestCase): From 519dcded918bbbda86fb2bb18f06633aed9da050 Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 12/28] Minor changes to loging in rmg.react --- rmgpy/rmg/react.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/rmgpy/rmg/react.py b/rmgpy/rmg/react.py index be1721360a..5c233f74e8 100644 --- a/rmgpy/rmg/react.py +++ b/rmgpy/rmg/react.py @@ -47,6 +47,7 @@ def determine_procnum_from_RAM(): """ Get available RAM (GB)and procnum dependent on OS. """ + from rmgpy.rmg.main import maxproc if platform.startswith('linux'): @@ -56,10 +57,6 @@ def determine_procnum_from_RAM(): tmp = divmod(memory_available, memory_use) tmp2 = min(maxproc, tmp[0]) procnum = max(1, int(tmp2)) - if procnum == 1: - logging.info('For reaction generation {0} process is used.'.format(procnum)) - else: - logging.info('For reaction generation {0} processes are used.'.format(procnum)) elif platform == "darwin": # OS X memory_available = psutil.virtual_memory().available/(1000.0 ** 3) @@ -67,14 +64,9 @@ def determine_procnum_from_RAM(): tmp = divmod(memory_available, memory_use) tmp2 = min(maxproc, tmp[0]) procnum = max(1, int(tmp2)) - if procnum == 1: - logging.info('For reaction generation {0} process is used.'.format(procnum)) - else: - logging.info('For reaction generation {0} processes are used.'.format(procnum)) else: # Everything else procnum = 1 - logging.info('For reaction generation {0} process is used.'.format(procnum)) # Return the maximal number of processes for multiprocessing return procnum @@ -102,12 +94,12 @@ def react(*spc_tuples): # This method chops the iterable into a number of chunks which it # submits to the process pool as separate tasks. if procnum == 1: + logging.info('For reaction generation {0} process is used.'.format(procnum)) reactions = map(_react_species_star, spc_tuples) else: + logging.info('For reaction generation {0} processes are used.'.format(procnum)) p = Pool(processes=procnum) - reactions = p.map(_react_species_star, spc_tuples) - p.close() p.join() From cf1fb49d8c740a33e0f13d0716c81a109c524e9e Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 13/28] Relocate maxproc check in RMG.initialize --- rmgpy/rmg/main.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py index 67ee812672..181d875354 100644 --- a/rmgpy/rmg/main.py +++ b/rmgpy/rmg/main.py @@ -460,6 +460,17 @@ def initialize(self, **kwargs): except KeyError: self.kineticsdatastore = False + global maxproc + try: + maxproc = kwargs['maxproc'] + except KeyError: + pass + + if maxproc > psutil.cpu_count(): + raise ValueError("""Invalid input for user defined maximum number of processes {0}; + should be an integer and smaller or equal to your available number of + processes {1}""".format(maxproc, psutil.cpu_count())) + # Load databases self.loadDatabase() @@ -488,17 +499,6 @@ def initialize(self, **kwargs): raise ValueError('Invalid format for wall time {0}; should be DD:HH:MM:SS.'.format(self.wallTime)) self.wallTime = int(data[-1]) + 60 * int(data[-2]) + 3600 * int(data[-3]) + 86400 * int(data[-4]) - global maxproc - try: - maxproc = kwargs['maxproc'] - except KeyError: - pass - - if maxproc > psutil.cpu_count(): - raise ValueError("""Invalid input for user defined maximum number of processes {0}; - should be an integer and smaller or equal to your available number of - processes {1}""".format(maxproc, psutil.cpu_count())) - # Initialize reaction model if restart: self.initializeRestartRun(os.path.join(self.outputDirectory,'restart.pkl')) From e1b744e1cf3c51a0ab84a78c989ea0c559f04c2d Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 14/28] Alternate approach to QMTP parallelization --- rmgpy/rmg/model.py | 177 ++++++++++++++++++++------------------------- 1 file changed, 80 insertions(+), 97 deletions(-) diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index c36683a38e..9d09cde2a8 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -69,32 +69,6 @@ ################################################################################ -def calculate_thermo_parallel(spc): - """ - If quantumMechanics is turned on in the input file species thermo data is calculated - in this function. - """ - - from rmgpy.rmg.input import getInput - - try: - quantumMechanics = getInput('quantumMechanics') - except Exception: - logging.debug('Quantum Mechanics DB could not be found.') - quantumMechanics = None - - spc.generate_resonance_structures() - original_molecule = spc.molecule[0] - - if quantumMechanics.settings.onlyCyclics and not original_molecule.isCyclic(): - pass - else: - if original_molecule.getRadicalCount() > quantumMechanics.settings.maxRadicalNumber: - pass - else: - logging.info('Not too many radicals: do a direct QM calculation.') - thermo0 = quantumMechanics.getThermoData(original_molecule) # returns None if it fails - class ReactionModel: """ Represent a generic reaction model. A reaction model consists of `species`, @@ -298,7 +272,7 @@ def checkForExistingSpecies(self, molecule): # At this point we can conclude that the species is new return None - def makeNewSpecies(self, object, label='', reactive=True, checkForExisting=True): + def makeNewSpecies(self, object, label='', reactive=True, checkForExisting=True, generateThermo=True): """ Formally create a new species from the specified `object`, which can be either a :class:`Molecule` object or an :class:`rmgpy.species.Species` @@ -338,25 +312,11 @@ def makeNewSpecies(self, object, label='', reactive=True, checkForExisting=True) spec.generate_resonance_structures() spec.molecularWeight = Quantity(spec.molecule[0].getMolecularWeight()*1000.,"amu") - if not spec.thermo: - submit(spec,self.solventName) - - if spec.label == '': - if spec.thermo and spec.thermo.label != '': #check if thermo libraries have a name for it - logging.info('Species with SMILES of {0} named {1} based on thermo library name'.format(molecule.toSMILES().replace('/','').replace('\\',''),spec.thermo.label)) - spec.label = spec.thermo.label - label = spec.label - else: - # Use SMILES as default format for label - # However, SMILES can contain slashes (to describe the - # stereochemistry around double bonds); since RMG doesn't - # distinguish cis and trans isomers, we'll just strip these out - # so that we can use the label in file paths - label = molecule.toSMILES().replace('/','').replace('\\','') - - logging.debug('Creating new species {0}'.format(label)) - - spec.generateEnergyTransferModel() + if generateThermo: + self.generateThermo(spec) + + logging.debug('Creating new species {0}'.format(spec.label)) + formula = molecule.getFormula() if formula in self.speciesDict: self.speciesDict[formula].append(spec) @@ -454,7 +414,7 @@ def checkForExistingReaction(self, rxn): return False, None - def makeNewReaction(self, forward, checkExisting=True): + def makeNewReaction(self, forward, checkExisting=True, generateThermo=True): """ Make a new reaction given a :class:`Reaction` object `forward`. The reaction is added to the global list of reactions. @@ -470,8 +430,8 @@ def makeNewReaction(self, forward, checkExisting=True): """ # Determine the proper species objects for all reactants and products - reactants = [self.makeNewSpecies(reactant)[0] for reactant in forward.reactants] - products = [self.makeNewSpecies(product)[0] for product in forward.products ] + reactants = [self.makeNewSpecies(reactant, generateThermo=generateThermo)[0] for reactant in forward.reactants] + products = [self.makeNewSpecies(product, generateThermo=generateThermo)[0] for product in forward.products ] if forward.specificCollider is not None: forward.specificCollider = self.makeNewSpecies(forward.specificCollider)[0] @@ -596,7 +556,7 @@ def enlarge(self, newObject=None, reactEdge=False, pdepNetwork, newSpecies = newObject newReactions.extend(pdepNetwork.exploreIsomer(newSpecies)) - self.processNewReactions(newReactions, newSpecies, pdepNetwork) + self.processNewReactions(newReactions, newSpecies, pdepNetwork, generateThermo=False) else: raise TypeError('Unable to use object {0} to enlarge reaction model; expecting an object of class rmg.model.Species or rmg.model.PDepNetwork, not {1}'.format(newObject, newObject.__class__)) @@ -617,7 +577,7 @@ def enlarge(self, newObject=None, reactEdge=False, if len(products) == 1 and products[0] == species: newReactions = network.exploreIsomer(species) - self.processNewReactions(newReactions, species, network) + self.processNewReactions(newReactions, species, network, generateThermo=False) network.updateConfigurations(self) index = 0 break @@ -636,57 +596,26 @@ def enlarge(self, newObject=None, reactEdge=False, unimolecularReact, bimolecularReact, trimolecularReact=trimolecularReact) # Get new species and save in spcs - spcs_tmp = [] + spcs = [] + spcs_list = [] for rxn in rxns: - spcs_tmp.extend(rxn.reactants) - spcs_tmp.extend(rxn.products) - - spcs = spcs_tmp - - from rmgpy.rmg.input import getInput - try: - quantumMechanics = getInput('quantumMechanics') - except Exception: - logging.debug('Quantum Mechanics DB could not be found.') - quantumMechanics = None - - if not quantumMechanics: - pass - else: - if not spcs_tmp: - spcs = spcs_tmp - else: - # Generate unique list of species to be submitted to QM thermo calculation - # intilize list - spcs=[spcs_tmp[0]] - for counter, spc in enumerate (spcs_tmp): - for counter2, val in enumerate (spcs): - if (spc.molecule[0].toSMILES() != val.molecule[0].toSMILES()): - appendReactant = True - else: - appendReactant = False - break - if appendReactant: - spcs.append(spc) - - procnum = determine_procnum_from_RAM() - - # Execute multiprocessing map. It blocks until the result is ready. - # This method chops the iterable into a number of chunks which it - # submits to the process pool as separate tasks. - p = Pool(processes=procnum) - p.map(calculate_thermo_parallel,spcs) - p.close() - p.join() - - ensure_independent_atom_ids(spcs, resonance=True) + spcs.extend(rxn.reactants) + spcs.extend(rxn.products) for rxn, spc in zip(rxns, spcs): - self.processNewReactions([rxn], spc) + self.processNewReactions([rxn], spc, generateThermo=False) ################################################################ # Begin processing the new species and reactions + # Determine number of parallel processes. + procnum = determine_procnum_from_RAM() + + # Generate thermo for new species + if self.newSpeciesList: + logging.info('Generating thermo for new species...') + self.applyThermoToSpecies(procnum) + # Generate kinetics of new reactions if self.newReactionList: logging.info('Generating kinetics for new reactions...') @@ -796,7 +725,7 @@ def clearSurfaceAdjustments(self): self.newSurfaceSpcsLoss = set() self.newSurfaceRxnsLoss = set() - def processNewReactions(self, newReactions, newSpecies, pdepNetwork=None): + def processNewReactions(self, newReactions, newSpecies, pdepNetwork=None, generateThermo=True): """ Process a list of newly-generated reactions involving the new core species or explored isomer `newSpecies` in network `pdepNetwork`. @@ -804,7 +733,7 @@ def processNewReactions(self, newReactions, newSpecies, pdepNetwork=None): Makes a reaction and decides where to put it: core, edge, or PDepNetwork. """ for rxn in newReactions: - rxn, isNew = self.makeNewReaction(rxn) + rxn, isNew = self.makeNewReaction(rxn, generateThermo=generateThermo) if rxn is None: # Skip this reaction because there was something wrong with it continue @@ -875,6 +804,60 @@ def processNewReactions(self, newReactions, newSpecies, pdepNetwork=None): if not numpy.isinf(self.toleranceThermoKeepSpeciesInEdge) and spcs != []: #do thermodynamic filtering self.thermoFilterSpecies(spcs) + def applyThermoToSpecies(self, procnum): + """ + Generate thermo for species. QM calculations are parallelized if requested. + """ + from rmgpy.rmg.input import getInput + quantumMechanics = getInput('quantumMechanics') + + if quantumMechanics: + # Generate a list of molecules. + mol_list = [] + for spc in self.newSpeciesList: + if spc.molecule[0].getRadicalCount() > quantumMechanics.settings.maxRadicalNumber: + for molecule in spc.molecule: + if quantumMechanics.settings.onlyCyclics and molecule.isCyclic(): + saturated_mol = molecule.copy(deep=True) + saturated_mol.saturate_radicals() + if saturated_mol not in mol_list: + mol_list.append(saturated_mol) + else: + if quantumMechanics.settings.onlyCyclics and spc.molecule[0].isCyclic(): + if spc.molecule[0] not in mol_list: + mol_list.append(spc.molecule[0]) + if procnum == 1: + logging.info('Writing QM files with {0} process.'.format(procnum)) + map(quantumMechanics.getThermoData, mol_list) + else: + logging.info('Writing QM files with {0} processes.'.format(procnum)) + p = Pool(processes=procnum) + p.map(quantumMechanics.getThermoData, mol_list) + p.close() + p.join() + + # Serial thermo calculation for other methods + map(self.generateThermo, self.newSpeciesList) + + def generateThermo(self, spc): + """ + Generate thermo for species. + """ + if not spc.thermo: + submit(spc, self.solventName) + if spc.thermo and spc.thermo.label != '': #check if thermo libraries have a name for it + logging.info('Species with SMILES of {0} named {1} based on thermo library name'.format(spc.molecule[0].toSMILES().replace('/','').replace('\\',''), spc.thermo.label)) + spc.label = spc.thermo.label + else: + # Use SMILES as default format for label + # However, SMILES can contain slashes (to describe the + # stereochemistry around double bonds); since RMG doesn't + # distinguish cis and trans isomers, we'll just strip these out + # so that we can use the label in file paths + spc.label = spc.molecule[0].toSMILES().replace('/','').replace('\\','') + + spc.generateEnergyTransferModel() + def applyKineticsToReaction(self, reaction): """ retrieve the best kinetics for the reaction and apply it towards the forward From 3994346e9a3304440d0d07bb2a3fd86b20df6f2e Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 15/28] Add QMTP parallelization when loading training reactions --- rmgpy/data/kinetics/family.py | 85 +++++++++++++++++++++++++++++++ rmgpy/data/kinetics/familyTest.py | 32 +++++++++++- 2 files changed, 116 insertions(+), 1 deletion(-) diff --git a/rmgpy/data/kinetics/family.py b/rmgpy/data/kinetics/family.py index b511493143..6c15a1bf4c 100644 --- a/rmgpy/data/kinetics/family.py +++ b/rmgpy/data/kinetics/family.py @@ -40,6 +40,7 @@ from copy import deepcopy from collections import OrderedDict from sklearn.model_selection import KFold +from multiprocessing import Pool from rmgpy.constraints import failsSpeciesConstraints from rmgpy.data.base import Database, Entry, LogicNode, LogicOr, ForbiddenStructures,\ @@ -372,6 +373,65 @@ def applyReverse(self, struct, unique=True): ################################################################################ +def generate_QMfiles(spcs, quantumMechanics, procnum): + """ + If quantumMechanics is turned on in the input file the QM files are written here in parallel. + Later, thermo is calculated for one species at a time in self.processNewReactions() by looking up the + values in the QM files. + """ + # Generating a list of molecules. + mol_list = [] + for spc in spcs: + if spc.molecule[0].getRadicalCount() > quantumMechanics.settings.maxRadicalNumber: + for molecule in spc.molecule: + if quantumMechanics.settings.onlyCyclics and molecule.isCyclic(): + mol_list.append(molecule) + else: + if quantumMechanics.settings.onlyCyclics and spc.molecule[0].isCyclic(): + mol_list.append(spc.molecule[0]) + + if mol_list: + # Generate a unique molecule list to avoid race conditions when writing the QMTP files in parallel. + for i, mol_QMTP in enumerate(mol_list): + if mol_QMTP: + for j in range(i+1, len(mol_list)): + mol2_QMTP = mol_list[j] + if mol2_QMTP and mol_QMTP.isIsomorphic(mol2_QMTP): + mol_list[j] = [] + mol_list = filter(None, mol_list) + + # Zip arguments for use in map. + mol_list_arg = [] + for mol in mol_list: + mol_list_arg.append((mol, quantumMechanics)) + + if mol_list_arg: + + # Execute multiprocessing map. It blocks until the result is ready. + # This method chops the iterable into a number of chunks which it + # submits to the process pool as separate tasks. + if procnum == 1: + logging.info('Writing QM files with {0} process.'.format(procnum)) + map(_write_QMfiles_star, mol_list_arg) + else: + logging.info('Writing QM files with {0} processes.'.format(procnum)) + p = Pool(processes=procnum) + p.map(_write_QMfiles_star, mol_list_arg) + p.close() + p.join() + +def _write_QMfiles_star(args): + """Wrapper to unpack zipped arguments for use with map""" + return write_QMfiles(*args) + +def write_QMfiles(mol, quantumMechanics): + """ + If quantumMechanics is turned on in the input file the QM files are written here in parallel. + Later, thermo is calculated for one species at a time in self.processNewReactions() by looking up the + values in the QM files. + """ + quantumMechanics.getThermoData(mol) + class KineticsFamily(Database): """ @@ -1155,6 +1215,31 @@ def addKineticsRulesFromTrainingSet(self, thermoDatabase=None,trainIndices=None) # trainingSet=True used later to does not allow species to match a liquid phase library and get corrected thermo which will affect reverse rate calculation item = Reaction(reactants=[Species(molecule=[m.molecule[0].copy(deep=True)], label=m.label) for m in entry.item.reactants], products=[Species(molecule=[m.molecule[0].copy(deep=True)], label=m.label) for m in entry.item.products]) + + # Determine number of parallel processes. + from rmgpy.rmg.react import determine_procnum_from_RAM + procnum = determine_procnum_from_RAM() + + if procnum > 1: + # If QMTP and multiprocessing write QMTP files here in parallel. + QMTP_list = [] + QMTP_list.extend(item.reactants) + QMTP_list.extend(item.products) + if QMTP_list: + from rmgpy.rmg.input import getInput + quantumMechanics = getInput('quantumMechanics') + if quantumMechanics: + # Generate unique species list to avoid race conditions when writing the QMTP files in parallel. + for i, spc_QMTP in enumerate(QMTP_list): + if spc_QMTP: + spc_QMTP.generate_resonance_structures() + for j in range(i+1, len(QMTP_list)): + spc2_QMTP = QMTP_list[j] + if spc2_QMTP and spc_QMTP.isIsomorphic(spc2_QMTP): + QMTP_list[j] = [] + QMTP_list = filter(None, QMTP_list) + generate_QMfiles(QMTP_list, quantumMechanics, procnum) + for reactant in item.reactants: reactant.generate_resonance_structures() reactant.thermo = thermoDatabase.getThermoData(reactant, trainingSet=True) diff --git a/rmgpy/data/kinetics/familyTest.py b/rmgpy/data/kinetics/familyTest.py index 004eb32ef5..0ff114b39b 100644 --- a/rmgpy/data/kinetics/familyTest.py +++ b/rmgpy/data/kinetics/familyTest.py @@ -39,7 +39,7 @@ from rmgpy import settings from rmgpy.data.thermo import ThermoDatabase from rmgpy.data.kinetics.database import KineticsDatabase -from rmgpy.data.kinetics.family import TemplateReaction +from rmgpy.data.kinetics.family import TemplateReaction, generate_QMfiles from rmgpy.data.rmg import RMGDatabase from rmgpy.molecule import Molecule from rmgpy.species import Species @@ -930,3 +930,33 @@ def test_reactant_num_mismatch_2(self): # self.assertEquals(len(reactionList), 14) reactionList = self.database.kinetics.families['Surface_Dissociation_vdW'].generateReactions(reactants) self.assertEquals(len(reactionList), 0) + +def test_generate_QMfiles(): + """Test that generate_QMfiles() works""" + + from rmgpy.qm.main import QMCalculator + quantumMechanics = QMCalculator(software = 'mopac', + method = 'pm3', + fileStore = 'QMfiles', + scratchDirectory = './', + onlyCyclics = True, + maxRadicalNumber = 0, + ) + + spc1 = Species().fromSMILES('c1ccccc1') + spc2 = Species().fromSMILES('CC1C=CC=CC=1') + QMTP_list = [spc1, spc2] + + procnum = 2 + + # Generate unique species list to avoid race conditions when writing the QMTP files in parallel. + for i, spc_QMTP in enumerate(QMTP_list): + if spc_QMTP: + spc_QMTP.generate_resonance_structures() + for j in range(i+1, len(QMTP_list)): + spc2_QMTP = QMTP_list[j] + if spc2_QMTP and spc_QMTP.isIsomorphic(spc2_QMTP): + QMTP_list[j] = [] + QMTP_list = filter(None, QMTP_list) + generate_QMfiles(QMTP_list, quantumMechanics, procnum) + From 4e5ce61d9eeaa7e7422ad3a9c7ed4036f004b9ce Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 16/28] For Species, regenerate resonance structures if atom IDs are invalid --- rmgpy/species.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rmgpy/species.py b/rmgpy/species.py index c9239236fe..15b9d51675 100644 --- a/rmgpy/species.py +++ b/rmgpy/species.py @@ -219,7 +219,7 @@ def generate_resonance_structures(self, keep_isomorphic=True, filter_structures= `molecule` is already greater than one, it is assumed that all of the resonance structures have already been generated. """ - if len(self.molecule) == 1: + if len(self.molecule) == 1 or not self.molecule[0].atomIDValid(): if not self.molecule[0].atomIDValid(): self.molecule[0].assignAtomIDs() self.molecule = self.molecule[0].generate_resonance_structures(keep_isomorphic=keep_isomorphic, From 3f3ada2e9baf3e8aba42896e56bde56b0d42769b Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 17/28] Restore retrieve species to keep pdep functional. --- rmgpy/rmg/model.py | 21 ++++++++++++++------- rmgpy/rmg/modelTest.py | 1 - rmgpy/rmg/reactTest.py | 13 ------------- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index 9d09cde2a8..c3c1548f4f 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -595,15 +595,10 @@ def enlarge(self, newObject=None, reactEdge=False, rxns = react_all(self.core.species, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=trimolecularReact) - # Get new species and save in spcs - spcs = [] - spcs_list = [] - for rxn in rxns: - spcs.extend(rxn.reactants) - spcs.extend(rxn.products) + spcs = [self.retrieve_species(rxn) for rxn in rxns] for rxn, spc in zip(rxns, spcs): - self.processNewReactions([rxn], spc, generateThermo=False) + self.processNewReactions([rxn], spc, generateThermo=False) ################################################################ # Begin processing the new species and reactions @@ -1904,6 +1899,18 @@ def getSpecies(self, obj): return spc return obj + def retrieve_species(self, rxn): + """ + Searches for the first reactant or product in the reaction that is + a core species, which was used to generate the reaction in the first + place. Reactants or products not represented in the core will be + a newly-generated structure. + """ + for obj in itertools.chain(rxn.reactants, rxn.products): + for spc in self.core.species: + if obj.isIsomorphic(spc): + return spc + raise Exception("No core species were found in either reactants or products of {0}!".format(rxn)) def generateReactionKey(rxn, useProducts=False): """ diff --git a/rmgpy/rmg/modelTest.py b/rmgpy/rmg/modelTest.py index 3c60ca07fc..8442324c2a 100644 --- a/rmgpy/rmg/modelTest.py +++ b/rmgpy/rmg/modelTest.py @@ -389,7 +389,6 @@ def testThermoFilterDown(self): self.assertEquals(len(difset),1) #should be one because we thermo filtered down to one edge species - def test_checkForExistingReaction_eliminates_identical_reactions(self): """ Test that checkForExistingReaction catches identical reactions. diff --git a/rmgpy/rmg/reactTest.py b/rmgpy/rmg/reactTest.py index 48972d0243..3254b42629 100644 --- a/rmgpy/rmg/reactTest.py +++ b/rmgpy/rmg/reactTest.py @@ -70,18 +70,6 @@ def setUp(self): reactionLibraries=[] ) -# def testReact(self): -# """ -# Test that reaction generation from the available families works. -# """ -# spcA = Species().fromSMILES('[OH]') -# spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] -# spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs] -# -# reactionList = list(react(*spcTuples)) -# self.assertIsNotNone(reactionList) -# self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in reactionList])) - def testReactMultiproc(self): """ Test that reaction generation from the available families works with python multiprocessing. @@ -116,7 +104,6 @@ def testReactAll(self): self.assertIsNotNone(rxns) self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in rxns])) - def tearDown(self): """ Reset the loaded database From 490ad1484683d34026d0d0404e8dcfeb2c6649bd Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 18/28] Moved thermo pruning to happen directly after thermo data calculation. Allows thermo pruning with parallel QMTP. --- rmgpy/rmg/model.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index c3c1548f4f..9fe353d36d 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -611,6 +611,10 @@ def enlarge(self, newObject=None, reactEdge=False, logging.info('Generating thermo for new species...') self.applyThermoToSpecies(procnum) + # Do thermodynamic filtering + if not numpy.isinf(self.toleranceThermoKeepSpeciesInEdge) and self.newSpeciesList != []: + self.thermoFilterSpecies(self.newSpeciesList) + # Generate kinetics of new reactions if self.newReactionList: logging.info('Generating kinetics for new reactions...') @@ -732,7 +736,6 @@ def processNewReactions(self, newReactions, newSpecies, pdepNetwork=None, genera if rxn is None: # Skip this reaction because there was something wrong with it continue - spcs = [] if isNew: # We've made a new reaction, so make sure the species involved # are in the core or edge @@ -743,13 +746,11 @@ def processNewReactions(self, newReactions, newSpecies, pdepNetwork=None, genera if spec not in self.core.species: allSpeciesInCore = False if spec not in self.edge.species: - spcs.append(spec) self.addSpeciesToEdge(spec) for spec in rxn.products: if spec not in self.core.species: allSpeciesInCore = False if spec not in self.edge.species: - spcs.append(spec) self.addSpeciesToEdge(spec) isomerAtoms = sum([len(spec.molecule[0].atoms) for spec in rxn.reactants]) @@ -795,10 +796,7 @@ def processNewReactions(self, newReactions, newSpecies, pdepNetwork=None, genera self.core.reactions.remove(rxn) if rxn in self.edge.reactions: self.edge.reactions.remove(rxn) - - if not numpy.isinf(self.toleranceThermoKeepSpeciesInEdge) and spcs != []: #do thermodynamic filtering - self.thermoFilterSpecies(spcs) - + def applyThermoToSpecies(self, procnum): """ Generate thermo for species. QM calculations are parallelized if requested. From 318e0743b6ad88c1c9762a983bd6033d3bd039aa Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 19/28] Fix pickle error for QMTP parallel. --- rmgpy/rmg/model.py | 59 +++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index 9fe353d36d..ebc82de932 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -40,8 +40,8 @@ import resource import psutil -from sys import platform +from sys import platform from multiprocessing import Pool from rmgpy.display import display @@ -68,6 +68,15 @@ from pdep import PDepReaction, PDepNetwork ################################################################################ +def _write_QMfiles_star(args): + """Wrapper to unpack zipped arguments for use with map""" + return write_QMfiles(*args) + +def write_QMfiles(mol, quantumMechanics): + """ + If quantumMechanics is turned on thermo is calculated in parallel here. + """ + quantumMechanics.getThermoData(mol) class ReactionModel: """ @@ -808,26 +817,34 @@ def applyThermoToSpecies(self, procnum): # Generate a list of molecules. mol_list = [] for spc in self.newSpeciesList: - if spc.molecule[0].getRadicalCount() > quantumMechanics.settings.maxRadicalNumber: - for molecule in spc.molecule: - if quantumMechanics.settings.onlyCyclics and molecule.isCyclic(): - saturated_mol = molecule.copy(deep=True) - saturated_mol.saturate_radicals() - if saturated_mol not in mol_list: - mol_list.append(saturated_mol) - else: - if quantumMechanics.settings.onlyCyclics and spc.molecule[0].isCyclic(): - if spc.molecule[0] not in mol_list: - mol_list.append(spc.molecule[0]) - if procnum == 1: - logging.info('Writing QM files with {0} process.'.format(procnum)) - map(quantumMechanics.getThermoData, mol_list) - else: - logging.info('Writing QM files with {0} processes.'.format(procnum)) - p = Pool(processes=procnum) - p.map(quantumMechanics.getThermoData, mol_list) - p.close() - p.join() + if not spc.thermo: + if spc.molecule[0].getRadicalCount() > quantumMechanics.settings.maxRadicalNumber: + for molecule in spc.molecule: + if quantumMechanics.settings.onlyCyclics and molecule.isCyclic(): + saturated_mol = molecule.copy(deep=True) + saturated_mol.saturate_radicals() + if saturated_mol not in mol_list: + mol_list.append(saturated_mol) + else: + if quantumMechanics.settings.onlyCyclics and spc.molecule[0].isCyclic(): + if spc.molecule[0] not in mol_list: + mol_list.append(spc.molecule[0]) + if mol_list: + # Zip arguments for use in map. + mol_list_arg = [] + for mol in mol_list: + mol_list_arg.append((mol, quantumMechanics)) + + if procnum == 1: + logging.info('Writing QM files with {0} process.'.format(procnum)) + #map(quantumMechanics.getThermoData, mol_list) + map(_write_QMfiles_star, mol_list_arg) + elif procnum > 1: + logging.info('Writing QM files with {0} processes.'.format(procnum)) + p = Pool(processes=procnum) + p.map(_write_QMfiles_star, mol_list_arg) + p.close() + p.join() # Serial thermo calculation for other methods map(self.generateThermo, self.newSpeciesList) From d59da68bb87348b9e931e57b1b0c358d011a34d8 Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 20/28] Update arkane/explorerTest.py. --- arkane/explorerTest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arkane/explorerTest.py b/arkane/explorerTest.py index 9245f49b6c..f95bd15309 100644 --- a/arkane/explorerTest.py +++ b/arkane/explorerTest.py @@ -76,7 +76,7 @@ def test_reactions(self): """ test that the right number of reactions are in output network """ - self.assertEqual(len(self.explorerjob.networks[0].pathReactions), 6) + self.assertEqual(len(self.explorerjob.networks[0].pathReactions), 5) def test_isomers(self): """ From a5085ddc9ba76f3f587bb6e885216b32fe860c7f Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 21/28] Auto-format spacing in qm.mainTest --- rmgpy/qm/mainTest.py | 448 ++++++++++++++++++++++--------------------- 1 file changed, 225 insertions(+), 223 deletions(-) diff --git a/rmgpy/qm/mainTest.py b/rmgpy/qm/mainTest.py index 085c8630c2..dc1990eac7 100644 --- a/rmgpy/qm/mainTest.py +++ b/rmgpy/qm/mainTest.py @@ -40,25 +40,26 @@ from rmgpy.qm.gaussian import Gaussian from rmgpy.qm.mopac import Mopac + class TestQMSettings(unittest.TestCase): """ Contains unit tests for the QMSettings class. """ - + def setUp(self): """ A function run before each unit test in this class. """ - RMGpy_path = os.path.normpath(os.path.join(getPath(),'..')) - - self.settings1 = QMSettings(software = 'mopac', - method = 'pm3', - fileStore = os.path.join(RMGpy_path, 'testing', 'qm', 'QMfiles'), - scratchDirectory = None, - onlyCyclics = False, - maxRadicalNumber = 0, - ) - + RMGpy_path = os.path.normpath(os.path.join(getPath(), '..')) + + self.settings1 = QMSettings(software='mopac', + method='pm3', + fileStore=os.path.join(RMGpy_path, 'testing', 'qm', 'QMfiles'), + scratchDirectory=None, + onlyCyclics=False, + maxRadicalNumber=0, + ) + self.settings2 = QMSettings() def testCheckAllSet(self): @@ -69,223 +70,224 @@ def testCheckAllSet(self): self.settings1.checkAllSet() except AssertionError: self.fail("checkAllSet() raised unexpected AssertionError.") - + with self.assertRaises(AssertionError): self.settings2.checkAllSet() + class TestQMCalculator(unittest.TestCase): - """ - Contains unit tests for the QMSettings class. - """ - - mopExecutablePath = Mopac.executablePath - if not os.path.exists(mopExecutablePath): - NO_MOPAC = NO_LICENCE = True - else: - NO_MOPAC = False - process = subprocess.Popen(mopExecutablePath, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdut, stderr = process.communicate("\n") - NO_LICENCE = 'To install the MOPAC license' in stderr - - gaussExecutablePath = Gaussian.executablePath - NO_GAUSSIAN = not os.path.exists(gaussExecutablePath) - - - def setUp(self): - """ - A function run before each unit test in this class. - """ - RMGpy_path = os.path.normpath(os.path.join(getPath(),'..')) - - fileStore = os.path.join(RMGpy_path, 'testing', 'qm', 'QMfiles') - - self.mop1 = QMCalculator(software = 'mopac', - method = 'pm3', - fileStore = fileStore - ) - - self.mop2 = QMCalculator(software = 'mopac', - method = 'pm6', - ) - - self.mop3 = QMCalculator(software = 'mopac', - method = 'pm7', - fileStore = fileStore - ) - - self.mop4 = QMCalculator(software = 'mopac', - method = 'pm8', - fileStore = fileStore - ) - - self.gauss1 = QMCalculator(software = 'gaussian', - method = 'pm3', - ) - - self.gauss2 = QMCalculator(software = 'gaussian', - method = 'pm6', - fileStore = fileStore - ) - - self.gauss3 = QMCalculator(software = 'gaussian', - method = 'pm7', - fileStore = fileStore - ) - - self.molpro1 = QMCalculator(software = 'molpro', - method = 'mp2', - fileStore = fileStore - ) - - self.qmmol1 = QMCalculator(fileStore=fileStore) - - self.qmmol2 = QMCalculator(fileStore=fileStore) - - def testSetDefaultOutputDirectory(self): - """ - Test that setDefaultOutputDirectory() works correctly. - """ - self.assertIsNotNone(self.mop1.settings.fileStore) - self.assertIsNotNone(self.mop3.settings.fileStore) - self.assertIsNotNone(self.gauss2.settings.fileStore) - - self.assertIsNone(self.mop2.settings.fileStore) - self.assertIsNone(self.gauss1.settings.fileStore) - - self.assertIsNone(self.mop1.settings.scratchDirectory) - self.assertIsNone(self.mop2.settings.scratchDirectory) - self.assertIsNone(self.mop3.settings.scratchDirectory) - self.assertIsNone(self.gauss1.settings.scratchDirectory) - self.assertIsNone(self.gauss2.settings.scratchDirectory) - - # Now set the default directories for those not set - outputDirectory = os.path.join(self.mop1.settings.fileStore, '..','..') - self.mop1.setDefaultOutputDirectory(outputDirectory) - self.mop2.setDefaultOutputDirectory(outputDirectory) - self.mop3.setDefaultOutputDirectory(outputDirectory) - self.gauss1.setDefaultOutputDirectory(outputDirectory) - self.gauss2.setDefaultOutputDirectory(outputDirectory) - - self.assertIsNotNone(self.mop1.settings.fileStore) - self.assertIsNotNone(self.mop2.settings.fileStore) - self.assertIsNotNone(self.mop3.settings.fileStore) - self.assertIsNotNone(self.gauss1.settings.fileStore) - self.assertIsNotNone(self.gauss2.settings.fileStore) - self.assertIsNotNone(self.mop1.settings.scratchDirectory) - self.assertIsNotNone(self.mop2.settings.scratchDirectory) - self.assertIsNotNone(self.mop3.settings.scratchDirectory) - self.assertIsNotNone(self.gauss1.settings.scratchDirectory) - self.assertIsNotNone(self.gauss2.settings.scratchDirectory) - - def testInitialize(self): - """ - Test that initialize() works correctly. - """ - - # Now set the default directories for those not set - outputDirectory = os.path.join(self.mop1.settings.fileStore, '..', '..') - self.mop1.setDefaultOutputDirectory(outputDirectory) - self.mop2.setDefaultOutputDirectory(outputDirectory) - self.mop3.setDefaultOutputDirectory(outputDirectory) - self.gauss1.setDefaultOutputDirectory(outputDirectory) - self.gauss2.setDefaultOutputDirectory(outputDirectory) - - try: - self.mop1.initialize() - self.mop2.initialize() - self.mop3.initialize() - self.gauss1.initialize() - self.gauss2.initialize() - except AssertionError: - self.fail("initialize() raised unexpected AssertionError.") - except Exception: - self.fail("initialize() raised Exception. Output file paths not correctly set.") - - def testGetThermoData(self): - """ - Test that getThermoData() fails when expected. - """ - outputDirectory = os.path.join(self.mop4.settings.fileStore, '..', '..') - self.mop4.setDefaultOutputDirectory(outputDirectory) - self.gauss3.setDefaultOutputDirectory(outputDirectory) - self.molpro1.setDefaultOutputDirectory(outputDirectory) - - mol = Molecule().fromSMILES('C1=CC=C2C=CC=CC2=C1') - - with self.assertRaises(Exception): - self.mop4.getThermoData(mol) - self.gauss3.getThermoData(mol) - self.molpro1.getThermoData(mol) - - @unittest.skipIf(NO_MOPAC, "MOPAC not found. Try resetting your environment variables if you want to use it.") - @unittest.skipIf(NO_LICENCE, "MOPAC license not installed. Run mopac for instructions") - def testGetThermoDataMopac(self): - """ - Test that Mocpac getThermoData() works correctly. - """ - outputDirectory = os.path.join(self.mop1.settings.fileStore, '..', '..') - self.mop1.setDefaultOutputDirectory(outputDirectory) - self.mop2.setDefaultOutputDirectory(outputDirectory) - self.mop3.setDefaultOutputDirectory(outputDirectory) - - mol = Molecule().fromSMILES('C1=CC=C2C=CC=CC2=C1') - - for directory in (self.mop1.settings.fileStore, self.mop1.settings.scratchDirectory): - shutil.rmtree(directory, ignore_errors=True) - - for directory in (self.mop2.settings.fileStore, self.mop2.settings.scratchDirectory): - shutil.rmtree(directory, ignore_errors=True) - - for directory in (self.mop3.settings.fileStore, self.mop3.settings.scratchDirectory): - shutil.rmtree(directory, ignore_errors=True) - - thermo1 = self.mop1.getThermoData(mol) - thermo2 = self.mop2.getThermoData(mol) - thermo3 = self.mop3.getThermoData(mol) - - self.assertTrue(thermo1.comment.startswith('QM MopacMolPM3')) - self.assertTrue(thermo2.comment.startswith('QM MopacMolPM6')) - self.assertTrue(thermo3.comment.startswith('QM MopacMolPM7')) - - self.assertAlmostEqual(thermo1.H298.value_si, 169708.0608, 1) # to 1 decimal place - self.assertAlmostEqual(thermo1.S298.value_si, 334.5007584, 1) # to 1 decimal place - self.assertAlmostEqual(thermo2.H298.value_si, 167704.4270, 1) # to 1 decimal place - self.assertAlmostEqual(thermo2.S298.value_si, 338.0999241, 1) # to 1 decimal place - self.assertAlmostEqual(thermo3.H298.value_si, 166168.8571, 1) # to 1 decimal place - self.assertAlmostEqual(thermo3.S298.value_si, 336.3330406, 1) # to 1 decimal place - - @unittest.skipIf(NO_GAUSSIAN, "Gaussian not found. Try resetting your environment variables if you want to use it.") - def testGetThermoDataGaussian(self): - """ - Test that Gaussian getThermoData() works correctly. - """ - outputDirectory = os.path.join(self.mop1.settings.fileStore, '..', '..') - self.gauss1.setDefaultOutputDirectory(outputDirectory) - self.gauss2.setDefaultOutputDirectory(outputDirectory) - - mol = Molecule().fromSMILES('C1=CC=C2C=CC=CC2=C1') - - for directory in (self.gauss1.settings.fileStore, self.gauss1.settings.scratchDirectory): - shutil.rmtree(directory, ignore_errors=True) - - for directory in (self.gauss1.settings.fileStore, self.gauss2.settings.scratchDirectory): - shutil.rmtree(directory, ignore_errors=True) - - thermo1 = self.gauss1.getThermoData(mol) - thermo2 = self.gauss2.getThermoData(mol) - - self.assertTrue(thermo1.comment.startswith('QM GaussianMolPM3')) - self.assertTrue(thermo2.comment.startswith('QM GaussianMolPM6')) - - self.assertAlmostEqual(thermo1.H298.value_si, 169908.3376, 0) # to 1 decimal place - self.assertAlmostEqual(thermo1.S298.value_si, 335.5438748, 0) # to 1 decimal place - self.assertAlmostEqual(thermo2.H298.value_si, 169326.2504, 0) # to 1 decimal place - self.assertAlmostEqual(thermo2.S298.value_si, 338.2696063, 0) # to 1 decimal place + """ + Contains unit tests for the QMSettings class. + """ + + mopExecutablePath = Mopac.executablePath + if not os.path.exists(mopExecutablePath): + NO_MOPAC = NO_LICENCE = True + else: + NO_MOPAC = False + process = subprocess.Popen(mopExecutablePath, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdut, stderr = process.communicate("\n") + NO_LICENCE = 'To install the MOPAC license' in stderr + + gaussExecutablePath = Gaussian.executablePath + NO_GAUSSIAN = not os.path.exists(gaussExecutablePath) + + def setUp(self): + """ + A function run before each unit test in this class. + """ + RMGpy_path = os.path.normpath(os.path.join(getPath(), '..')) + + fileStore = os.path.join(RMGpy_path, 'testing', 'qm', 'QMfiles') + + self.mop1 = QMCalculator(software='mopac', + method='pm3', + fileStore=fileStore + ) + + self.mop2 = QMCalculator(software='mopac', + method='pm6', + ) + + self.mop3 = QMCalculator(software='mopac', + method='pm7', + fileStore=fileStore + ) + + self.mop4 = QMCalculator(software='mopac', + method='pm8', + fileStore=fileStore + ) + + self.gauss1 = QMCalculator(software='gaussian', + method='pm3', + ) + + self.gauss2 = QMCalculator(software='gaussian', + method='pm6', + fileStore=fileStore + ) + + self.gauss3 = QMCalculator(software='gaussian', + method='pm7', + fileStore=fileStore + ) + + self.molpro1 = QMCalculator(software='molpro', + method='mp2', + fileStore=fileStore + ) + + self.qmmol1 = QMCalculator(fileStore=fileStore) + + self.qmmol2 = QMCalculator(fileStore=fileStore) + + def testSetDefaultOutputDirectory(self): + """ + Test that setDefaultOutputDirectory() works correctly. + """ + self.assertIsNotNone(self.mop1.settings.fileStore) + self.assertIsNotNone(self.mop3.settings.fileStore) + self.assertIsNotNone(self.gauss2.settings.fileStore) + + self.assertIsNone(self.mop2.settings.fileStore) + self.assertIsNone(self.gauss1.settings.fileStore) + + self.assertIsNone(self.mop1.settings.scratchDirectory) + self.assertIsNone(self.mop2.settings.scratchDirectory) + self.assertIsNone(self.mop3.settings.scratchDirectory) + self.assertIsNone(self.gauss1.settings.scratchDirectory) + self.assertIsNone(self.gauss2.settings.scratchDirectory) + + # Now set the default directories for those not set + outputDirectory = os.path.join(self.mop1.settings.fileStore, '..', '..') + self.mop1.setDefaultOutputDirectory(outputDirectory) + self.mop2.setDefaultOutputDirectory(outputDirectory) + self.mop3.setDefaultOutputDirectory(outputDirectory) + self.gauss1.setDefaultOutputDirectory(outputDirectory) + self.gauss2.setDefaultOutputDirectory(outputDirectory) + + self.assertIsNotNone(self.mop1.settings.fileStore) + self.assertIsNotNone(self.mop2.settings.fileStore) + self.assertIsNotNone(self.mop3.settings.fileStore) + self.assertIsNotNone(self.gauss1.settings.fileStore) + self.assertIsNotNone(self.gauss2.settings.fileStore) + self.assertIsNotNone(self.mop1.settings.scratchDirectory) + self.assertIsNotNone(self.mop2.settings.scratchDirectory) + self.assertIsNotNone(self.mop3.settings.scratchDirectory) + self.assertIsNotNone(self.gauss1.settings.scratchDirectory) + self.assertIsNotNone(self.gauss2.settings.scratchDirectory) + + def testInitialize(self): + """ + Test that initialize() works correctly. + """ + + # Now set the default directories for those not set + outputDirectory = os.path.join(self.mop1.settings.fileStore, '..', '..') + self.mop1.setDefaultOutputDirectory(outputDirectory) + self.mop2.setDefaultOutputDirectory(outputDirectory) + self.mop3.setDefaultOutputDirectory(outputDirectory) + self.gauss1.setDefaultOutputDirectory(outputDirectory) + self.gauss2.setDefaultOutputDirectory(outputDirectory) + + try: + self.mop1.initialize() + self.mop2.initialize() + self.mop3.initialize() + self.gauss1.initialize() + self.gauss2.initialize() + except AssertionError: + self.fail("initialize() raised unexpected AssertionError.") + except Exception: + self.fail("initialize() raised Exception. Output file paths not correctly set.") + + def testGetThermoData(self): + """ + Test that getThermoData() fails when expected. + """ + outputDirectory = os.path.join(self.mop4.settings.fileStore, '..', '..') + self.mop4.setDefaultOutputDirectory(outputDirectory) + self.gauss3.setDefaultOutputDirectory(outputDirectory) + self.molpro1.setDefaultOutputDirectory(outputDirectory) + + mol = Molecule().fromSMILES('C1=CC=C2C=CC=CC2=C1') + + with self.assertRaises(Exception): + self.mop4.getThermoData(mol) + self.gauss3.getThermoData(mol) + self.molpro1.getThermoData(mol) + + @unittest.skipIf(NO_MOPAC, "MOPAC not found. Try resetting your environment variables if you want to use it.") + @unittest.skipIf(NO_LICENCE, "MOPAC license not installed. Run mopac for instructions") + def testGetThermoDataMopac(self): + """ + Test that Mocpac getThermoData() works correctly. + """ + outputDirectory = os.path.join(self.mop1.settings.fileStore, '..', '..') + self.mop1.setDefaultOutputDirectory(outputDirectory) + self.mop2.setDefaultOutputDirectory(outputDirectory) + self.mop3.setDefaultOutputDirectory(outputDirectory) + + mol = Molecule().fromSMILES('C1=CC=C2C=CC=CC2=C1') + + for directory in (self.mop1.settings.fileStore, self.mop1.settings.scratchDirectory): + shutil.rmtree(directory, ignore_errors=True) + + for directory in (self.mop2.settings.fileStore, self.mop2.settings.scratchDirectory): + shutil.rmtree(directory, ignore_errors=True) + + for directory in (self.mop3.settings.fileStore, self.mop3.settings.scratchDirectory): + shutil.rmtree(directory, ignore_errors=True) + + thermo1 = self.mop1.getThermoData(mol) + thermo2 = self.mop2.getThermoData(mol) + thermo3 = self.mop3.getThermoData(mol) + + self.assertTrue(thermo1.comment.startswith('QM MopacMolPM3')) + self.assertTrue(thermo2.comment.startswith('QM MopacMolPM6')) + self.assertTrue(thermo3.comment.startswith('QM MopacMolPM7')) + + self.assertAlmostEqual(thermo1.H298.value_si, 169708.0608, 1) # to 1 decimal place + self.assertAlmostEqual(thermo1.S298.value_si, 334.5007584, 1) # to 1 decimal place + self.assertAlmostEqual(thermo2.H298.value_si, 167704.4270, 1) # to 1 decimal place + self.assertAlmostEqual(thermo2.S298.value_si, 338.0999241, 1) # to 1 decimal place + self.assertAlmostEqual(thermo3.H298.value_si, 166168.8571, 1) # to 1 decimal place + self.assertAlmostEqual(thermo3.S298.value_si, 336.3330406, 1) # to 1 decimal place + + @unittest.skipIf(NO_GAUSSIAN, "Gaussian not found. Try resetting your environment variables if you want to use it.") + def testGetThermoDataGaussian(self): + """ + Test that Gaussian getThermoData() works correctly. + """ + outputDirectory = os.path.join(self.mop1.settings.fileStore, '..', '..') + self.gauss1.setDefaultOutputDirectory(outputDirectory) + self.gauss2.setDefaultOutputDirectory(outputDirectory) + + mol = Molecule().fromSMILES('C1=CC=C2C=CC=CC2=C1') + + for directory in (self.gauss1.settings.fileStore, self.gauss1.settings.scratchDirectory): + shutil.rmtree(directory, ignore_errors=True) + + for directory in (self.gauss1.settings.fileStore, self.gauss2.settings.scratchDirectory): + shutil.rmtree(directory, ignore_errors=True) + + thermo1 = self.gauss1.getThermoData(mol) + thermo2 = self.gauss2.getThermoData(mol) + + self.assertTrue(thermo1.comment.startswith('QM GaussianMolPM3')) + self.assertTrue(thermo2.comment.startswith('QM GaussianMolPM6')) + + self.assertAlmostEqual(thermo1.H298.value_si, 169908.3376, 0) # to 1 decimal place + self.assertAlmostEqual(thermo1.S298.value_si, 335.5438748, 0) # to 1 decimal place + self.assertAlmostEqual(thermo2.H298.value_si, 169326.2504, 0) # to 1 decimal place + self.assertAlmostEqual(thermo2.S298.value_si, 338.2696063, 0) # to 1 decimal place + ################################################################################ if __name__ == '__main__': - unittest.main( testRunner = unittest.TextTestRunner(verbosity=2) ) + unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) From 05856079758e677b4864679563153e979806bbd7 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 22/28] Make generate_QMfiles a method of QMCalculator --- rmgpy/data/kinetics/family.py | 82 +++-------------------------------- rmgpy/qm/main.py | 47 +++++++++++++++++++- rmgpy/rmg/model.py | 41 +----------------- 3 files changed, 52 insertions(+), 118 deletions(-) diff --git a/rmgpy/data/kinetics/family.py b/rmgpy/data/kinetics/family.py index 6c15a1bf4c..63869e171a 100644 --- a/rmgpy/data/kinetics/family.py +++ b/rmgpy/data/kinetics/family.py @@ -40,7 +40,6 @@ from copy import deepcopy from collections import OrderedDict from sklearn.model_selection import KFold -from multiprocessing import Pool from rmgpy.constraints import failsSpeciesConstraints from rmgpy.data.base import Database, Entry, LogicNode, LogicOr, ForbiddenStructures,\ @@ -373,64 +372,6 @@ def applyReverse(self, struct, unique=True): ################################################################################ -def generate_QMfiles(spcs, quantumMechanics, procnum): - """ - If quantumMechanics is turned on in the input file the QM files are written here in parallel. - Later, thermo is calculated for one species at a time in self.processNewReactions() by looking up the - values in the QM files. - """ - # Generating a list of molecules. - mol_list = [] - for spc in spcs: - if spc.molecule[0].getRadicalCount() > quantumMechanics.settings.maxRadicalNumber: - for molecule in spc.molecule: - if quantumMechanics.settings.onlyCyclics and molecule.isCyclic(): - mol_list.append(molecule) - else: - if quantumMechanics.settings.onlyCyclics and spc.molecule[0].isCyclic(): - mol_list.append(spc.molecule[0]) - - if mol_list: - # Generate a unique molecule list to avoid race conditions when writing the QMTP files in parallel. - for i, mol_QMTP in enumerate(mol_list): - if mol_QMTP: - for j in range(i+1, len(mol_list)): - mol2_QMTP = mol_list[j] - if mol2_QMTP and mol_QMTP.isIsomorphic(mol2_QMTP): - mol_list[j] = [] - mol_list = filter(None, mol_list) - - # Zip arguments for use in map. - mol_list_arg = [] - for mol in mol_list: - mol_list_arg.append((mol, quantumMechanics)) - - if mol_list_arg: - - # Execute multiprocessing map. It blocks until the result is ready. - # This method chops the iterable into a number of chunks which it - # submits to the process pool as separate tasks. - if procnum == 1: - logging.info('Writing QM files with {0} process.'.format(procnum)) - map(_write_QMfiles_star, mol_list_arg) - else: - logging.info('Writing QM files with {0} processes.'.format(procnum)) - p = Pool(processes=procnum) - p.map(_write_QMfiles_star, mol_list_arg) - p.close() - p.join() - -def _write_QMfiles_star(args): - """Wrapper to unpack zipped arguments for use with map""" - return write_QMfiles(*args) - -def write_QMfiles(mol, quantumMechanics): - """ - If quantumMechanics is turned on in the input file the QM files are written here in parallel. - Later, thermo is calculated for one species at a time in self.processNewReactions() by looking up the - values in the QM files. - """ - quantumMechanics.getThermoData(mol) class KineticsFamily(Database): @@ -1222,24 +1163,11 @@ def addKineticsRulesFromTrainingSet(self, thermoDatabase=None,trainIndices=None) if procnum > 1: # If QMTP and multiprocessing write QMTP files here in parallel. - QMTP_list = [] - QMTP_list.extend(item.reactants) - QMTP_list.extend(item.products) - if QMTP_list: - from rmgpy.rmg.input import getInput - quantumMechanics = getInput('quantumMechanics') - if quantumMechanics: - # Generate unique species list to avoid race conditions when writing the QMTP files in parallel. - for i, spc_QMTP in enumerate(QMTP_list): - if spc_QMTP: - spc_QMTP.generate_resonance_structures() - for j in range(i+1, len(QMTP_list)): - spc2_QMTP = QMTP_list[j] - if spc2_QMTP and spc_QMTP.isIsomorphic(spc2_QMTP): - QMTP_list[j] = [] - QMTP_list = filter(None, QMTP_list) - generate_QMfiles(QMTP_list, quantumMechanics, procnum) - + from rmgpy.rmg.input import getInput + quantumMechanics = getInput('quantumMechanics') + if quantumMechanics: + quantumMechanics.runJobs(item.reactants+item.products, procnum=procnum) + for reactant in item.reactants: reactant.generate_resonance_structures() reactant.thermo = thermoDatabase.getThermoData(reactant, trainingSet=True) diff --git a/rmgpy/qm/main.py b/rmgpy/qm/main.py index 5a66f5a2a9..7dd0cb4504 100644 --- a/rmgpy/qm/main.py +++ b/rmgpy/qm/main.py @@ -29,6 +29,7 @@ ############################################################################### import os +from multiprocessing import Pool import logging @@ -36,6 +37,7 @@ import rmgpy.qm.gaussian from rmgpy.data.thermo import ThermoLibrary + class QMSettings(): """ A minimal class to store settings related to quantum mechanics calculations. @@ -226,7 +228,50 @@ def getThermoData(self, molecule): else: raise Exception("Unknown QM software '{0}'".format(self.settings.software)) return thermo0 - + + def runJobs(self, spc_list, procnum=1): + """ + Run QM jobs for the provided species list (in parallel if requested). + """ + mol_list = [] + for spc in spc_list: + if spc.molecule[0].getRadicalCount() > self.settings.maxRadicalNumber: + for molecule in spc.molecule: + if self.settings.onlyCyclics and molecule.isCyclic(): + saturated_mol = molecule.copy(deep=True) + saturated_mol.saturate_radicals() + if saturated_mol not in mol_list: + mol_list.append(saturated_mol) + else: + if self.settings.onlyCyclics and spc.molecule[0].isCyclic(): + if spc.molecule[0] not in mol_list: + mol_list.append(spc.molecule[0]) + if mol_list: + # Zip arguments for use in map. + qm_arg_list = [(self, mol) for mol in mol_list] + + if procnum == 1: + logging.info('Writing QM files with {0} process.'.format(procnum)) + map(_write_QMfiles_star, qm_arg_list) + elif procnum > 1: + logging.info('Writing QM files with {0} processes.'.format(procnum)) + p = Pool(processes=procnum) + p.map(_write_QMfiles_star, qm_arg_list) + p.close() + p.join() + + +def _write_QMfiles_star(args): + """Wrapper to unpack zipped arguments for use with map""" + return _write_QMfiles(*args) + + +def _write_QMfiles(quantumMechanics, mol): + """ + If quantumMechanics is turned on thermo is calculated in parallel here. + """ + quantumMechanics.getThermoData(mol) + def save(rmg): # Save the QM thermo to a library if QM was turned on diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index ebc82de932..afb98249e8 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -68,15 +68,6 @@ from pdep import PDepReaction, PDepNetwork ################################################################################ -def _write_QMfiles_star(args): - """Wrapper to unpack zipped arguments for use with map""" - return write_QMfiles(*args) - -def write_QMfiles(mol, quantumMechanics): - """ - If quantumMechanics is turned on thermo is calculated in parallel here. - """ - quantumMechanics.getThermoData(mol) class ReactionModel: """ @@ -814,37 +805,7 @@ def applyThermoToSpecies(self, procnum): quantumMechanics = getInput('quantumMechanics') if quantumMechanics: - # Generate a list of molecules. - mol_list = [] - for spc in self.newSpeciesList: - if not spc.thermo: - if spc.molecule[0].getRadicalCount() > quantumMechanics.settings.maxRadicalNumber: - for molecule in spc.molecule: - if quantumMechanics.settings.onlyCyclics and molecule.isCyclic(): - saturated_mol = molecule.copy(deep=True) - saturated_mol.saturate_radicals() - if saturated_mol not in mol_list: - mol_list.append(saturated_mol) - else: - if quantumMechanics.settings.onlyCyclics and spc.molecule[0].isCyclic(): - if spc.molecule[0] not in mol_list: - mol_list.append(spc.molecule[0]) - if mol_list: - # Zip arguments for use in map. - mol_list_arg = [] - for mol in mol_list: - mol_list_arg.append((mol, quantumMechanics)) - - if procnum == 1: - logging.info('Writing QM files with {0} process.'.format(procnum)) - #map(quantumMechanics.getThermoData, mol_list) - map(_write_QMfiles_star, mol_list_arg) - elif procnum > 1: - logging.info('Writing QM files with {0} processes.'.format(procnum)) - p = Pool(processes=procnum) - p.map(_write_QMfiles_star, mol_list_arg) - p.close() - p.join() + quantumMechanics.runJobs(self.newSpeciesList, procnum=procnum) # Serial thermo calculation for other methods map(self.generateThermo, self.newSpeciesList) From fcda6b6413769c58a957e491ed9d1190450168e0 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 23/28] Move generate_QMfiles unit test to qm.mainTest --- rmgpy/data/kinetics/familyTest.py | 32 +----------------------- rmgpy/qm/mainTest.py | 41 ++++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 42 deletions(-) diff --git a/rmgpy/data/kinetics/familyTest.py b/rmgpy/data/kinetics/familyTest.py index 0ff114b39b..004eb32ef5 100644 --- a/rmgpy/data/kinetics/familyTest.py +++ b/rmgpy/data/kinetics/familyTest.py @@ -39,7 +39,7 @@ from rmgpy import settings from rmgpy.data.thermo import ThermoDatabase from rmgpy.data.kinetics.database import KineticsDatabase -from rmgpy.data.kinetics.family import TemplateReaction, generate_QMfiles +from rmgpy.data.kinetics.family import TemplateReaction from rmgpy.data.rmg import RMGDatabase from rmgpy.molecule import Molecule from rmgpy.species import Species @@ -930,33 +930,3 @@ def test_reactant_num_mismatch_2(self): # self.assertEquals(len(reactionList), 14) reactionList = self.database.kinetics.families['Surface_Dissociation_vdW'].generateReactions(reactants) self.assertEquals(len(reactionList), 0) - -def test_generate_QMfiles(): - """Test that generate_QMfiles() works""" - - from rmgpy.qm.main import QMCalculator - quantumMechanics = QMCalculator(software = 'mopac', - method = 'pm3', - fileStore = 'QMfiles', - scratchDirectory = './', - onlyCyclics = True, - maxRadicalNumber = 0, - ) - - spc1 = Species().fromSMILES('c1ccccc1') - spc2 = Species().fromSMILES('CC1C=CC=CC=1') - QMTP_list = [spc1, spc2] - - procnum = 2 - - # Generate unique species list to avoid race conditions when writing the QMTP files in parallel. - for i, spc_QMTP in enumerate(QMTP_list): - if spc_QMTP: - spc_QMTP.generate_resonance_structures() - for j in range(i+1, len(QMTP_list)): - spc2_QMTP = QMTP_list[j] - if spc2_QMTP and spc_QMTP.isIsomorphic(spc2_QMTP): - QMTP_list[j] = [] - QMTP_list = filter(None, QMTP_list) - generate_QMfiles(QMTP_list, quantumMechanics, procnum) - diff --git a/rmgpy/qm/mainTest.py b/rmgpy/qm/mainTest.py index dc1990eac7..dfbc732fa8 100644 --- a/rmgpy/qm/mainTest.py +++ b/rmgpy/qm/mainTest.py @@ -36,6 +36,7 @@ from rmgpy import getPath from rmgpy.qm.main import QMSettings, QMCalculator from rmgpy.molecule import Molecule +from rmgpy.species import Species from rmgpy.qm.gaussian import Gaussian from rmgpy.qm.mopac import Mopac @@ -99,13 +100,12 @@ def setUp(self): """ A function run before each unit test in this class. """ - RMGpy_path = os.path.normpath(os.path.join(getPath(), '..')) - - fileStore = os.path.join(RMGpy_path, 'testing', 'qm', 'QMfiles') + rmg_path = os.path.normpath(os.path.join(getPath(), '..')) + self.fileStore = os.path.join(rmg_path, 'testing', 'qm', 'QMfiles') self.mop1 = QMCalculator(software='mopac', method='pm3', - fileStore=fileStore + fileStore=self.fileStore ) self.mop2 = QMCalculator(software='mopac', @@ -114,12 +114,12 @@ def setUp(self): self.mop3 = QMCalculator(software='mopac', method='pm7', - fileStore=fileStore + fileStore=self.fileStore ) self.mop4 = QMCalculator(software='mopac', method='pm8', - fileStore=fileStore + fileStore=self.fileStore ) self.gauss1 = QMCalculator(software='gaussian', @@ -128,22 +128,22 @@ def setUp(self): self.gauss2 = QMCalculator(software='gaussian', method='pm6', - fileStore=fileStore + fileStore=self.fileStore ) self.gauss3 = QMCalculator(software='gaussian', method='pm7', - fileStore=fileStore + fileStore=self.fileStore ) self.molpro1 = QMCalculator(software='molpro', method='mp2', - fileStore=fileStore + fileStore=self.fileStore ) - self.qmmol1 = QMCalculator(fileStore=fileStore) + self.qmmol1 = QMCalculator(fileStore=self.fileStore) - self.qmmol2 = QMCalculator(fileStore=fileStore) + self.qmmol2 = QMCalculator(fileStore=self.fileStore) def testSetDefaultOutputDirectory(self): """ @@ -286,6 +286,25 @@ def testGetThermoDataGaussian(self): self.assertAlmostEqual(thermo2.H298.value_si, 169326.2504, 0) # to 1 decimal place self.assertAlmostEqual(thermo2.S298.value_si, 338.2696063, 0) # to 1 decimal place + @unittest.skipIf(NO_MOPAC, "MOPAC not found. Try resetting your environment variables if you want to use it.") + @unittest.skipIf(NO_LICENCE, "MOPAC license not installed. Run mopac for instructions") + def testRunJobs(self): + """Test that runJobs() works properly.""" + qm = QMCalculator(software='mopac', + method='pm3', + fileStore=self.fileStore, + onlyCyclics=True, + maxRadicalNumber=0, + ) + outputDirectory = os.path.join(qm.settings.fileStore, '..', '..') + qm.setDefaultOutputDirectory(outputDirectory) + + spc1 = Species().fromSMILES('c1ccccc1') + spc2 = Species().fromSMILES('CC1C=CC=CC=1') + spcList = [spc1, spc2] + + qm.runJobs(spcList, procnum=1) + ################################################################################ From 80b288a48cfb91401a8463e72b1b9abc781503c5 Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 24/28] Moving determine_procnum_from_RAM() to rmgpy.rmg.main. --- rmgpy/data/kinetics/family.py | 8 +++--- rmgpy/rmg/main.py | 34 +++++++++++++++++++------ rmgpy/rmg/model.py | 11 ++++---- rmgpy/rmg/react.py | 47 +++-------------------------------- rmgpy/rmg/reactTest.py | 3 ++- 5 files changed, 43 insertions(+), 60 deletions(-) diff --git a/rmgpy/data/kinetics/family.py b/rmgpy/data/kinetics/family.py index 63869e171a..7a78694543 100644 --- a/rmgpy/data/kinetics/family.py +++ b/rmgpy/data/kinetics/family.py @@ -1072,6 +1072,10 @@ def addKineticsRulesFromTrainingSet(self, thermoDatabase=None,trainIndices=None) logging.info('Must be because you turned off the training depository.') return + # Determine number of parallel processes. + from rmgpy.rmg.main import determine_procnum_from_RAM + procnum = determine_procnum_from_RAM() + tentries = depository.entries index = max([e.index for e in self.rules.getEntries()] or [0]) + 1 @@ -1157,10 +1161,6 @@ def addKineticsRulesFromTrainingSet(self, thermoDatabase=None,trainIndices=None) item = Reaction(reactants=[Species(molecule=[m.molecule[0].copy(deep=True)], label=m.label) for m in entry.item.reactants], products=[Species(molecule=[m.molecule[0].copy(deep=True)], label=m.label) for m in entry.item.products]) - # Determine number of parallel processes. - from rmgpy.rmg.react import determine_procnum_from_RAM - procnum = determine_procnum_from_RAM() - if procnum > 1: # If QMTP and multiprocessing write QMTP files here in parallel. from rmgpy.rmg.input import getInput diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py index 181d875354..595ead0c15 100644 --- a/rmgpy/rmg/main.py +++ b/rmgpy/rmg/main.py @@ -39,6 +39,8 @@ import logging import os import shutil +import resource +import psutil import numpy as np import gc @@ -442,13 +444,6 @@ def initialize(self, **kwargs): if len(self.modelSettingsList) > 0: self.filterReactions = self.modelSettingsList[0].filterReactions - # See if memory profiling package is available - try: - import psutil - except ImportError: - logging.info('Optional package dependency "psutil" not found; memory profiling information will not be saved.') - - # Make output subdirectories util.makeOutputSubdirectory(self.outputDirectory, 'pdep') util.makeOutputSubdirectory(self.outputDirectory, 'solver') @@ -1738,6 +1733,31 @@ def readMeaningfulLineJava(self, f): ################################################################################ +def determine_procnum_from_RAM(): + """ + Get available RAM (GB)and procnum dependent on OS. + """ + if sys.platform.startswith('linux'): + # linux + memory_available = psutil.virtual_memory().free / (1000.0 ** 3) + memory_use = psutil.Process(os.getpid()).memory_info()[0]/(1000.0 ** 3) + tmp = divmod(memory_available, memory_use) + tmp2 = min(maxproc, tmp[0]) + procnum = max(1, int(tmp2)) + elif sys.platform == "darwin": + # OS X + memory_available = psutil.virtual_memory().available/(1000.0 ** 3) + memory_use = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/(1000.0 ** 3) + tmp = divmod(memory_available, memory_use) + tmp2 = min(maxproc, tmp[0]) + procnum = max(1, int(tmp2)) + else: + # Everything else + procnum = 1 + + # Return the maximal number of processes for multiprocessing + return procnum + def initializeLog(verbose, log_file_name): """ Set up a logger for RMG to use to print output to stdout. The diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index afb98249e8..b310be6fa1 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -62,7 +62,7 @@ from rmgpy.data.rmg import getDB import rmgpy.data.rmg -from .react import react_all, determine_procnum_from_RAM +from .react import react_all from rmgpy.data.kinetics.common import ensure_independent_atom_ids, find_degenerate_reactions from pdep import PDepReaction, PDepNetwork @@ -530,6 +530,10 @@ def enlarge(self, newObject=None, reactEdge=False, reactionsMovedFromEdge = [] self.newReactionList = []; self.newSpeciesList = [] + # Determine number of parallel processes. + from rmgpy.rmg.main import determine_procnum_from_RAM + procnum = determine_procnum_from_RAM() + if reactEdge is False: # We are adding core species newReactions = [] @@ -593,7 +597,7 @@ def enlarge(self, newObject=None, reactEdge=False, else: # We are reacting the edge rxns = react_all(self.core.species, numOldCoreSpecies, - unimolecularReact, bimolecularReact, trimolecularReact=trimolecularReact) + unimolecularReact, bimolecularReact, trimolecularReact=trimolecularReact, procnum=procnum) spcs = [self.retrieve_species(rxn) for rxn in rxns] @@ -602,9 +606,6 @@ def enlarge(self, newObject=None, reactEdge=False, ################################################################ # Begin processing the new species and reactions - - # Determine number of parallel processes. - procnum = determine_procnum_from_RAM() # Generate thermo for new species if self.newSpeciesList: diff --git a/rmgpy/rmg/react.py b/rmgpy/rmg/react.py index 5c233f74e8..db901702ca 100644 --- a/rmgpy/rmg/react.py +++ b/rmgpy/rmg/react.py @@ -33,45 +33,12 @@ """ import itertools import logging -import resource -import psutil -import os -from sys import platform from rmgpy.data.rmg import getDB from multiprocessing import Pool ################################################################################ - -def determine_procnum_from_RAM(): - """ - Get available RAM (GB)and procnum dependent on OS. - """ - - from rmgpy.rmg.main import maxproc - - if platform.startswith('linux'): - # linux - memory_available = psutil.virtual_memory().free / (1000.0 ** 3) - memory_use = psutil.Process(os.getpid()).memory_info()[0]/(1000.0 ** 3) - tmp = divmod(memory_available, memory_use) - tmp2 = min(maxproc, tmp[0]) - procnum = max(1, int(tmp2)) - elif platform == "darwin": - # OS X - memory_available = psutil.virtual_memory().available/(1000.0 ** 3) - memory_use = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/(1000.0 ** 3) - tmp = divmod(memory_available, memory_use) - tmp2 = min(maxproc, tmp[0]) - procnum = max(1, int(tmp2)) - else: - # Everything else - procnum = 1 - - # Return the maximal number of processes for multiprocessing - return procnum - -def react(*spc_tuples): +def react(spc_tuples, procnum=1): """ Generate reactions between the species in the list of species tuples for all the reaction families available. @@ -87,9 +54,6 @@ def react(*spc_tuples): Returns a flat generator object containing the generated Reaction objects. """ - - procnum = determine_procnum_from_RAM() - # Execute multiprocessing map. It blocks until the result is ready. # This method chops the iterable into a number of chunks which it # submits to the process pool as separate tasks. @@ -124,14 +88,11 @@ def react_species(species_tuple, only_families=None): return reactions -def react_all(core_spc_list, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=None): +def react_all(core_spc_list, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=None, procnum=1): """ Reacts the core species list via uni-, bi-, and trimolecular reactions and splits reaction families per task for improved load balancing in parallel runs. """ - - procnum = determine_procnum_from_RAM() - # Select reactive species that can undergo unimolecular reactions: spc_tuples = [(core_spc_list[i],) for i in xrange(numOldCoreSpecies) if (unimolecularReact[i] and core_spc_list[i].reactive)] @@ -184,7 +145,7 @@ def react_all(core_spc_list, numOldCoreSpecies, unimolecularReact, bimolecularRe for item in split_list: spc_fam_tuples.append((spc_tuple, item)) else: - spc_fam_tuples.append((spc_tuple,)) + spc_fam_tuples.append((spc_tuple, )) - return list(react(*spc_fam_tuples)) + return list(react(spc_fam_tuples, procnum)) diff --git a/rmgpy/rmg/reactTest.py b/rmgpy/rmg/reactTest.py index 3254b42629..e293285d28 100644 --- a/rmgpy/rmg/reactTest.py +++ b/rmgpy/rmg/reactTest.py @@ -91,6 +91,7 @@ def testReactAll(self): """ import rmgpy.rmg.main rmgpy.rmg.main.maxproc = 2 + procnum = 2 spcs = [ Species().fromSMILES('CC'), @@ -100,7 +101,7 @@ def testReactAll(self): ] N = len(spcs) - rxns = react_all(spcs, N, np.ones(N), np.ones([N, N]), np.ones([N, N, N])) + rxns = react_all(spcs, N, np.ones(N), np.ones([N, N]), np.ones([N, N, N]), procnum) self.assertIsNotNone(rxns) self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in rxns])) From cd22300e25c5a389d69ece91cd6d1a7dad833991 Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 25/28] Update tests for moving determine_procnum_from_RAM() to rmgpy.rmg.main. --- rmgpy/rmg/modelTest.py | 8 +++++--- rmgpy/rmg/parreactTest.py | 3 ++- rmgpy/rmg/reactTest.py | 3 ++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/rmgpy/rmg/modelTest.py b/rmgpy/rmg/modelTest.py index 8442324c2a..b81c606584 100644 --- a/rmgpy/rmg/modelTest.py +++ b/rmgpy/rmg/modelTest.py @@ -134,6 +134,7 @@ class item: P.value_si = 101000.0 rsys.T = T rsys.P = P + procnum = 2 cerm = CoreEdgeReactionModel() @@ -141,8 +142,8 @@ class item: spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] spcTuples = [((spcA, spc), ['H_Abstraction']) for spc in spcs] - rxns = list(react(*spcTuples)) - rxns += list(react(*[((spcs[0], spcs[1]), ['H_Abstraction'])])) + rxns = list(react(spcTuples, procnum)) + rxns += list(react([((spcs[0], spcs[1]), ['H_Abstraction'])], procnum)) for rxn in rxns: cerm.makeNewReaction(rxn) @@ -240,11 +241,12 @@ def testMakeNewReaction(self): Test that CoreEdgeReactionModel.makeNewReaction method correctly works. """ + procnum = 2 spcA = Species().fromSMILES('[OH]') spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] spcTuples = [((spcA, spc), ['H_Abstraction']) for spc in spcs] - rxns = list(react(*spcTuples)) + rxns = list(react(spcTuples, procnum)) cerm = CoreEdgeReactionModel() diff --git a/rmgpy/rmg/parreactTest.py b/rmgpy/rmg/parreactTest.py index 2b9e8257d2..a6b360ca41 100644 --- a/rmgpy/rmg/parreactTest.py +++ b/rmgpy/rmg/parreactTest.py @@ -91,8 +91,9 @@ def generate(): spcA = Species().fromSMILES('[OH]') spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] spcTuples = [(spcA, spc) for spc in spcs] + procnum = 2 - reactionList = list(react(*spcTuples)) + reactionList = list(react(spcTuples, procnum)) if not reactionList: return False diff --git a/rmgpy/rmg/reactTest.py b/rmgpy/rmg/reactTest.py index e293285d28..a6a7839740 100644 --- a/rmgpy/rmg/reactTest.py +++ b/rmgpy/rmg/reactTest.py @@ -76,12 +76,13 @@ def testReactMultiproc(self): """ import rmgpy.rmg.main rmgpy.rmg.main.maxproc = 2 + procnum = 2 spcA = Species().fromSMILES('[OH]') spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] spcTuples = [((spcA, spc), ['H_Abstraction']) for spc in spcs] - reactionList = list(react(*spcTuples)) + reactionList = list(react(spcTuples, procnum)) self.assertIsNotNone(reactionList) self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in reactionList])) From 189d5c999bf64f31c1a3aeeac4465fa23c96feee Mon Sep 17 00:00:00 2001 From: Agnes Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 26/28] Changed available number of processes to available number of processors. --- rmgpy/rmg/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py index 595ead0c15..615c27eaff 100644 --- a/rmgpy/rmg/main.py +++ b/rmgpy/rmg/main.py @@ -464,7 +464,7 @@ def initialize(self, **kwargs): if maxproc > psutil.cpu_count(): raise ValueError("""Invalid input for user defined maximum number of processes {0}; should be an integer and smaller or equal to your available number of - processes {1}""".format(maxproc, psutil.cpu_count())) + processors {1}""".format(maxproc, psutil.cpu_count())) # Load databases self.loadDatabase() From 7318b37a4bde16a05fe752708839b8aa83d1c6b1 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 30 May 2019 18:44:32 -0400 Subject: [PATCH 27/28] Refactor species labeling based on thermo label Add explicit `rename` argument to generateThermo which is only true when called from enlarge. Thus, only new species without thermo are renamed, which prevents initial species and bath gases from getting accidentally renamed. --- rmgpy/rmg/model.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py index b310be6fa1..8187362050 100644 --- a/rmgpy/rmg/model.py +++ b/rmgpy/rmg/model.py @@ -315,6 +315,10 @@ def makeNewSpecies(self, object, label='', reactive=True, checkForExisting=True, if generateThermo: self.generateThermo(spec) + # If the species still does not have a label, set initial label as the SMILES + # This may change later after getting thermo in self.generateThermo() + if not spec.label: + spec.label = spec.SMILES logging.debug('Creating new species {0}'.format(spec.label)) formula = molecule.getFormula() @@ -809,24 +813,19 @@ def applyThermoToSpecies(self, procnum): quantumMechanics.runJobs(self.newSpeciesList, procnum=procnum) # Serial thermo calculation for other methods - map(self.generateThermo, self.newSpeciesList) + for spc in self.newSpeciesList: + self.generateThermo(spc, rename=True) - def generateThermo(self, spc): + def generateThermo(self, spc, rename=False): """ Generate thermo for species. """ if not spc.thermo: submit(spc, self.solventName) - if spc.thermo and spc.thermo.label != '': #check if thermo libraries have a name for it - logging.info('Species with SMILES of {0} named {1} based on thermo library name'.format(spc.molecule[0].toSMILES().replace('/','').replace('\\',''), spc.thermo.label)) + + if rename and spc.thermo and spc.thermo.label != '': # check if thermo libraries have a name for it + logging.info('Species {0} renamed {1} based on thermo library name'.format(spc.label, spc.thermo.label)) spc.label = spc.thermo.label - else: - # Use SMILES as default format for label - # However, SMILES can contain slashes (to describe the - # stereochemistry around double bonds); since RMG doesn't - # distinguish cis and trans isomers, we'll just strip these out - # so that we can use the label in file paths - spc.label = spc.molecule[0].toSMILES().replace('/','').replace('\\','') spc.generateEnergyTransferModel() From f749f491ca0ea7b6b37a69a6537906366880c150 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 30 May 2019 19:13:22 -0400 Subject: [PATCH 28/28] Improvements to reactTest Reduce number of families being tested Make separate tests for serial and parallel processing --- rmgpy/rmg/reactTest.py | 73 ++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/rmgpy/rmg/reactTest.py b/rmgpy/rmg/reactTest.py index a6a7839740..e64c4452db 100644 --- a/rmgpy/rmg/reactTest.py +++ b/rmgpy/rmg/reactTest.py @@ -42,12 +42,8 @@ ################################################### -TESTFAMILY = ['H_Abstraction','R_Recombination','Intra_Disproportionation','Intra_RH_Add_Endocyclic', - 'Singlet_Carbene_Intra_Disproportionation','Intra_ene_reaction','Disproportionation', - '1,4_Linear_birad_scission','R_Addition_MultipleBond','2+2_cycloaddition_Cd','Diels_alder_addition', - 'Intra_RH_Add_Exocyclic','Intra_Retro_Diels_alder_bicyclic','Intra_2+2_cycloaddition_Cd', - 'Birad_recombination','Intra_Diels_alder_monocyclic','1,4_Cyclic_birad_scission', - '1,2_Insertion_carbene','1,2_Insertion_CO'] +TESTFAMILIES = ['H_Abstraction', 'R_Recombination', 'Disproportionation', 'R_Addition_MultipleBond'] + class TestReact(unittest.TestCase): @@ -66,45 +62,81 @@ def setUp(self): self.rmg.database.loadForbiddenStructures(os.path.join(path, 'forbiddenStructures.py')) # kinetics family loading self.rmg.database.loadKinetics(os.path.join(path, 'kinetics'), - kineticsFamilies=TESTFAMILY, + kineticsFamilies=TESTFAMILIES, reactionLibraries=[] ) - def testReactMultiproc(self): + def testReact(self): """ - Test that reaction generation from the available families works with python multiprocessing. + Test that the ``react`` function works in serial + """ + procnum = 1 + + spc_a = Species().fromSMILES('[OH]') + spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] + spc_tuples = [((spc_a, spc), ['H_Abstraction']) for spc in spcs] + + reaction_list = list(react(spc_tuples, procnum)) + self.assertIsNotNone(reaction_list) + self.assertEqual(len(reaction_list), 3) + self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in reaction_list])) + + def testReactParallel(self): + """ + Test that the ``react`` function works in parallel using Python multiprocessing """ import rmgpy.rmg.main rmgpy.rmg.main.maxproc = 2 procnum = 2 - spcA = Species().fromSMILES('[OH]') + spc_a = Species().fromSMILES('[OH]') spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')] - spcTuples = [((spcA, spc), ['H_Abstraction']) for spc in spcs] + spc_tuples = [((spc_a, spc), ['H_Abstraction']) for spc in spcs] - reactionList = list(react(spcTuples, procnum)) - self.assertIsNotNone(reactionList) - self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in reactionList])) + reaction_list = list(react(spc_tuples, procnum)) + self.assertIsNotNone(reaction_list) + self.assertEqual(len(reaction_list), 3) + self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in reaction_list])) def testReactAll(self): """ - Test that the reactAll function works. + Test that the ``react_all`` function works in serial + """ + procnum = 1 + + spcs = [ + Species().fromSMILES('C=C'), + Species().fromSMILES('[CH3]'), + Species().fromSMILES('[OH]'), + Species().fromSMILES('CCCCCCCCCCC') + ] + + n = len(spcs) + reaction_list = react_all(spcs, n, np.ones(n), np.ones([n, n]), np.ones([n, n, n]), procnum) + self.assertIsNotNone(reaction_list) + self.assertEqual(len(reaction_list), 44) + self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in reaction_list])) + + def testReactAllParallel(self): + """ + Test that the ``react_all`` function works in parallel using Python multiprocessing """ import rmgpy.rmg.main rmgpy.rmg.main.maxproc = 2 procnum = 2 spcs = [ - Species().fromSMILES('CC'), + Species().fromSMILES('C=C'), Species().fromSMILES('[CH3]'), Species().fromSMILES('[OH]'), Species().fromSMILES('CCCCCCCCCCC') ] - N = len(spcs) - rxns = react_all(spcs, N, np.ones(N), np.ones([N, N]), np.ones([N, N, N]), procnum) - self.assertIsNotNone(rxns) - self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in rxns])) + n = len(spcs) + reaction_list = react_all(spcs, n, np.ones(n), np.ones([n, n]), np.ones([n, n, n]), procnum) + self.assertIsNotNone(reaction_list) + self.assertEqual(len(reaction_list), 44) + self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in reaction_list])) def tearDown(self): """ @@ -113,5 +145,6 @@ def tearDown(self): import rmgpy.data.rmg rmgpy.data.rmg.database = None + if __name__ == '__main__': unittest.main()