From 8e3e27be6dcd9e5ebaede92a386ecfbf7a6b33e3 Mon Sep 17 00:00:00 2001
From: Max Liu <mjliu@mit.edu>
Date: Fri, 29 Mar 2019 16:13:47 -0400
Subject: [PATCH] Refactor family splitting and some code style changes

Simplify code for family splitting
Rename functions in react module using underscores
Remove reactPdep functions and use react_species instead
---
 rmgpy/rmg/model.py     |   6 +-
 rmgpy/rmg/modelTest.py |   6 +-
 rmgpy/rmg/pdep.py      |   4 +-
 rmgpy/rmg/react.py     | 254 +++++++++--------------------------------
 rmgpy/rmg/reactTest.py |   6 +-
 rmgpy/rmg/rmgTest.py   |   8 +-
 6 files changed, 71 insertions(+), 213 deletions(-)

diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py
index 2a5ba41900c..b67c9bf0466 100644
--- a/rmgpy/rmg/model.py
+++ b/rmgpy/rmg/model.py
@@ -62,7 +62,7 @@
 from rmgpy.data.rmg import getDB
         
 import rmgpy.data.rmg
-from .react import reactAll
+from .react import react_all
 from rmgpy.data.kinetics.common import ensure_independent_atom_ids, find_degenerate_reactions
 
 from pdep import PDepReaction, PDepNetwork
@@ -693,8 +693,8 @@ def enlarge(self, newObject=None, reactEdge=False,
 
         else:
             # We are reacting the edge
-            rxns = reactAll(self.core.species, numOldCoreSpecies,
-                            unimolecularReact, bimolecularReact, trimolecularReact=trimolecularReact)
+            rxns = react_all(self.core.species, numOldCoreSpecies,
+                             unimolecularReact, bimolecularReact, trimolecularReact=trimolecularReact)
 
             # Get new species and save in spcs
             spcs_tmp = []
diff --git a/rmgpy/rmg/modelTest.py b/rmgpy/rmg/modelTest.py
index 1d945f0d585..4d16995129c 100644
--- a/rmgpy/rmg/modelTest.py
+++ b/rmgpy/rmg/modelTest.py
@@ -139,10 +139,10 @@ class item:
         
         spcA = Species().fromSMILES('[OH]')
         spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')]
-        spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs]
+        spcTuples = [((spcA, spc), ['H_Abstraction']) for spc in spcs]
         
         rxns = list(react(*spcTuples))
-        rxns += list(react(*[(spcs[0], spcs[1], ['H_Abstraction'])]))
+        rxns += list(react(*[((spcs[0], spcs[1]), ['H_Abstraction'])]))
         
         for rxn in rxns:
             cerm.makeNewReaction(rxn)
@@ -243,7 +243,7 @@ def testMakeNewReaction(self):
 
         spcA = Species().fromSMILES('[OH]')
         spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')]
-        spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs]
+        spcTuples = [((spcA, spc), ['H_Abstraction']) for spc in spcs]
 
         rxns = list(react(*spcTuples))
 
diff --git a/rmgpy/rmg/pdep.py b/rmgpy/rmg/pdep.py
index 30bd40dd594..fe5cef42a34 100644
--- a/rmgpy/rmg/pdep.py
+++ b/rmgpy/rmg/pdep.py
@@ -44,7 +44,7 @@
 from rmgpy.constants import R
 
 from rmgpy.pdep import Conformer, Configuration
-from rmgpy.rmg.react import reactPdep
+from rmgpy.rmg.react import react_species
 from rmgpy.exceptions import PressureDependenceError, NetworkError
 from rmgpy.data.kinetics.library import LibraryReaction
 
@@ -300,7 +300,7 @@ def exploreIsomer(self, isomer):
         # Don't find reactions involving the new species as bimolecular
         # reactants or products with other core species (e.g. A + B <---> products)
 
-        newReactions = reactPdep((isomer,))
+        newReactions = react_species((isomer,))
         
         return newReactions
 
diff --git a/rmgpy/rmg/react.py b/rmgpy/rmg/react.py
index d586adb852c..09678dea5c1 100644
--- a/rmgpy/rmg/react.py
+++ b/rmgpy/rmg/react.py
@@ -41,6 +41,7 @@
 from rmgpy.data.rmg import getDB
 from multiprocessing import Pool
 
+
 def react(*spc_tuples):
     """
     Generate reactions between the species in the
@@ -92,40 +93,37 @@ def react(*spc_tuples):
     # This method chops the iterable into a number of chunks which it
     # submits to the process pool as separate tasks.
     if procnum == 1:
-        reactions = map(
-                    reactSpecies,
-                    spc_tuples)
+        reactions = map(_react_species_star, spc_tuples)
     else:
         p = Pool(processes=procnum)
         
-        reactions = p.map(
-                    reactSpecies,
-	            spc_tuples)
-	
+        reactions = p.map(_react_species_star, spc_tuples)
+
         p.close()
         p.join()
 
-
     return itertools.chain.from_iterable(reactions)
 
 
-def reactSpecies(species_tuple_tmp):
+def _react_species_star(args):
+    """Wrapper to unpack zipped arguments for use with map"""
+    return react_species(*args)
+
+
+def react_species(species_tuple, only_families=None):
     """
     Given a tuple of Species objects, generates all possible reactions
     from the loaded reaction families and combines degenerate reactions.
     """
 
-    species_tuple = species_tuple_tmp[0:-1]
-    own_families = species_tuple_tmp[-1]
-
     species_tuple = tuple([spc.copy(deep=True) for spc in species_tuple])
 
-    reactions = getDB('kinetics').generate_reactions_from_families(species_tuple, only_families=own_families)
+    reactions = getDB('kinetics').generate_reactions_from_families(species_tuple, only_families=only_families)
 
     return reactions
 
 
-def reactAll(core_spc_list, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=None):
+def react_all(core_spc_list, numOldCoreSpecies, unimolecularReact, bimolecularReact, trimolecularReact=None):
     """
     Reacts the core species list via uni-, bi-, and trimolecular
     reactions and splits reaction families per task for improved load balancing in parallel runs.
@@ -133,198 +131,58 @@ def reactAll(core_spc_list, numOldCoreSpecies, unimolecularReact, bimolecularRea
 
     from rmgpy.rmg.main import maxproc
 
-    # Load kineticsFamilies to be added to reactant tuple to allow for improved load balancing 
-    # in parallel jobs.
-    split_listOrig = []
-    split_list_tmp = []
-    for key in getDB('kinetics').families:
-        split_listOrig.append(key)
-        split_list_tmp.append(key)
+    # Select reactive species that can undergo unimolecular reactions:
+    spc_tuples = [(core_spc_list[i],)
+                  for i in xrange(numOldCoreSpecies) if (unimolecularReact[i] and core_spc_list[i].reactive)]
 
-    if maxproc == 1:
-        # Select reactive species that can undergo unimolecular reactions:
-        spc_tuplestmp = [(core_spc_list[i], split_listOrig)
-         for i in xrange(numOldCoreSpecies) if (unimolecularReact[i] and core_spc_list[i].reactive)]
-    
-        for i in xrange(numOldCoreSpecies):
-            for j in xrange(i, numOldCoreSpecies):
-                # Find reactions involving the species that are bimolecular.
-                # This includes a species reacting with itself (if its own concentration is high enough).
-                if bimolecularReact[i,j]:
-                    if core_spc_list[i].reactive and core_spc_list[j].reactive:
-                        spc_tuplestmp.append((core_spc_list[i], core_spc_list[j], split_listOrig))
-    
-        if trimolecularReact is not None:
-            for i in xrange(numOldCoreSpecies):
-                for j in xrange(i, numOldCoreSpecies):
-                    for k in xrange(j, numOldCoreSpecies):
-                        # Find reactions involving the species that are trimolecular.
-                        if trimolecularReact[i,j,k]:
-                            if core_spc_list[i].reactive and core_spc_list[j].reactive and core_spc_list[k].reactive:
-                                spc_tuplestmp.append((core_spc_list[i], core_spc_list[j], core_spc_list[k], split_listOrig))
-    else:
-        # Select reactive species that can undergo unimolecular reactions:
-        spc_tuples = [(core_spc_list[i],)
-         for i in xrange(numOldCoreSpecies) if (unimolecularReact[i] and core_spc_list[i].reactive)]
-    
+    for i in xrange(numOldCoreSpecies):
+        for j in xrange(i, numOldCoreSpecies):
+            # Find reactions involving the species that are bimolecular.
+            # This includes a species reacting with itself (if its own concentration is high enough).
+            if bimolecularReact[i, j]:
+                if core_spc_list[i].reactive and core_spc_list[j].reactive:
+                    spc_tuples.append((core_spc_list[i], core_spc_list[j]))
+
+    if trimolecularReact is not None:
         for i in xrange(numOldCoreSpecies):
             for j in xrange(i, numOldCoreSpecies):
-                # Find reactions involving the species that are bimolecular.
-                # This includes a species reacting with itself (if its own concentration is high enough).
-                if bimolecularReact[i,j]:
-                    if core_spc_list[i].reactive and core_spc_list[j].reactive:
-                        spc_tuples.append((core_spc_list[i], core_spc_list[j]))
-    
-        if trimolecularReact is not None:
-            for i in xrange(numOldCoreSpecies):
-                for j in xrange(i, numOldCoreSpecies):
-                    for k in xrange(j, numOldCoreSpecies):
-                        # Find reactions involving the species that are trimolecular.
-                        if trimolecularReact[i,j,k]:
-                            if core_spc_list[i].reactive and core_spc_list[j].reactive and core_spc_list[k].reactive:
-                                spc_tuples.append((core_spc_list[i], core_spc_list[j], core_spc_list[k]))
-
+                for k in xrange(j, numOldCoreSpecies):
+                    # Find reactions involving the species that are trimolecular.
+                    if trimolecularReact[i, j, k]:
+                        if core_spc_list[i].reactive and core_spc_list[j].reactive and core_spc_list[k].reactive:
+                            spc_tuples.append((core_spc_list[i], core_spc_list[j], core_spc_list[k]))
 
+    if maxproc == 1:
+        # React all families like normal (provide empty argument for only_families)
+        spc_tuples = zip(spc_tuples)
+    else:
         # Identify and split families that are prone to generate many reactions into sublists.
+        family_list = getDB('kinetics').families.keys()
+        major_families = [
+            'H_Abstraction', 'R_Recombination', 'Intra_Disproportionation', 'Intra_RH_Add_Endocyclic',
+            'Singlet_Carbene_Intra_Disproportionation', 'Intra_ene_reaction', 'Disproportionation',
+            '1,4_Linear_birad_scission', 'R_Addition_MultipleBond', '2+2_cycloaddition_Cd', 'Diels_alder_addition',
+            'Intra_RH_Add_Exocyclic', 'Intra_Retro_Diels_alder_bicyclic', 'Intra_2+2_cycloaddition_Cd',
+            'Birad_recombination', 'Intra_Diels_alder_monocyclic', '1,4_Cyclic_birad_scission', '1,2_Insertion_carbene',
+        ]
+
         split_list = []
-        for i in range(len(split_list_tmp)):
-            if split_list_tmp[i] == 'H_Abstraction':
-                split_list_tmp[i] = []
-                split_list.append(['H_Abstraction'])
-            elif split_list_tmp[i] == 'R_Recombination':
-                split_list_tmp[i] = []
-                split_list.append(['R_Recombination'])
-            elif split_list_tmp[i] == 'Intra_Disproportionation':
-                split_list_tmp[i] = []
-                split_list.append(['Intra_Disproportionation'])
-            elif split_list_tmp[i] == 'Intra_RH_Add_Endocyclic':
-                split_list_tmp[i] = []
-                split_list.append(['Intra_RH_Add_Endocyclic'])
-            elif split_list_tmp[i] == 'Singlet_Carbene_Intra_Disproportionation':
-                split_list_tmp[i] = []
-                split_list.append(['Singlet_Carbene_Intra_Disproportionation'])
-            elif split_list_tmp[i] == 'Intra_ene_reaction':
-                split_list_tmp[i] = []
-                split_list.append(['Intra_ene_reaction'])
-            elif split_list_tmp[i] == 'Disproportionation':
-                split_list_tmp[i] = []
-                split_list.append(['Disproportionation'])
-            elif split_list_tmp[i] == '1,4_Linear_birad_scission':
-                split_list_tmp[i] = []
-                split_list.append(['1,4_Linear_birad_scission'])
-            elif split_list_tmp[i] == 'R_Addition_MultipleBond':
-                split_list_tmp[i] = []
-                split_list.append(['R_Addition_MultipleBond'])
-            elif split_list_tmp[i] == '2+2_cycloaddition_Cd':
-                split_list_tmp[i] = []
-                split_list.append(['2+2_cycloaddition_Cd'])
-            elif split_list_tmp[i] == 'Diels_alder_addition':
-                split_list_tmp[i] = []
-                split_list.append(['Diels_alder_addition'])
-            elif split_list_tmp[i] == 'Intra_RH_Add_Exocyclic':
-                split_list_tmp[i] = []
-                split_list.append(['Intra_RH_Add_Exocyclic'])
-            elif split_list_tmp[i] == 'Intra_Retro_Diels_alder_bicyclic':
-                split_list_tmp[i] = []
-                split_list.append(['Intra_Retro_Diels_alder_bicyclic'])
-            elif split_list_tmp[i] == 'Intra_2+2_cycloaddition_Cd':
-                split_list_tmp[i] = []
-                split_list.append(['Intra_2+2_cycloaddition_Cd'])
-            elif split_list_tmp[i] == 'Birad_recombination':
-                split_list_tmp[i] = []
-                split_list.append(['Birad_recombination'])
-            elif split_list_tmp[i] == 'Intra_Diels_alder_monocyclic':
-                split_list_tmp[i] = []
-                split_list.append(['Intra_Diels_alder_monocyclic'])
-            elif split_list_tmp[i] == '1,4_Cyclic_birad_scission':
-                split_list_tmp[i] = []
-                split_list.append(['1,4_Cyclic_birad_scission'])
-            elif split_list_tmp[i] == '1,2_Insertion_carbene':
-                split_list_tmp[i] = []
-                split_list.append(['1,2_Insertion_carbene'])
-    
-        # Remove empty lists from remaining split_list_tmp. It now contains only
-        # families that are not mentioned above.
-        split_list.append(filter(None, split_list_tmp))
-    
-        # Only employ family splitting for reactants that have a larger number than nAFS.
-        nAFS = 10
-    
-        spc_tuplestmp = []
-        # Append reaction families to reactant tuple.
-        for tmpj in spc_tuples:
-            if len(tmpj) == 1:
-                if len(str(tmpj[0])) > nAFS:
-                    for tmpl in split_list: 
-                        tmpk = list(tmpj)
-                        tmpk.append(tmpl)
-                        spc_tuplestmp.append(tuple(tmpk))
-                else:
-                    tmpk = list(tmpj)
-                    tmpk.append(split_listOrig)
-                    spc_tuplestmp.append(tuple(tmpk))
-            elif len(tmpj) == 2:
-                if (len(str(tmpj[0])) > nAFS
-                   ) or (len(str(tmpj[1])) > nAFS):
-                    for tmpl in split_list:
-                        tmpk = list(tmpj)
-                        tmpk.append(tmpl)
-                        spc_tuplestmp.append(tuple(tmpk))
-                else:
-                    tmpk = list(tmpj)
-                    tmpk.append(split_listOrig)
-                    spc_tuplestmp.append(tuple(tmpk))
+        leftovers = []
+        for fam in family_list:
+            if fam in major_families:
+                split_list.append([fam])
             else:
-                if (len(str(tmpj[0])) > nAFS
-                   ) or (len(str(tmpj[1])) > nAFS
-                        ) or (len(str(tmpj[2])) > nAFS):
-                    for tmpl in split_list:
-                        tmpk = list(tmpj)
-                        tmpk.append(tmpl)
-                        spc_tuplestmp.append(tuple(tmpk))
-                else:
-                    tmpk = list(tmpj)
-                    tmpk.append(split_listOrig)
-                    spc_tuplestmp.append(tuple(tmpk))
-
-    rxns = list(react(*spc_tuplestmp))
-
-    return rxns
-
-def reactPdep(*spc_tuples):
-    """
-    Generate reactions between the species in the
-    list of species tuples for all the reaction families available.
+                leftovers.append(fam)
+        split_list.append(leftovers)
 
-    For each tuple of one or more Species objects [(spc1,), (spc2, spc3), ...]
-    the following is done:
+        # Only employ family splitting for reactants that have a larger number than min_atoms
+        min_atoms = 10
 
-    A list of tuples is created for each resonance isomer of the species.
-    Each tuple consists of (Molecule, index) with the index the species index of the Species object.
-
-    Possible combinations between the first spc in the tuple, and the second species in the tuple
-    is obtained by taking the combinatorial product of the two generated [(Molecule, index)] lists.
-
-    Returns a flat generator object containing the generated Reaction objects.
-    """
-
-    reactions = map(
-                react_species_pdep,
-                spc_tuples)
-
-    return itertools.chain.from_iterable(reactions)
-
-
-def react_species_pdep(species_tuple):
-    """
-    Given a tuple of Species objects, generates all possible reactions
-    from the loaded reaction families and combines degenerate reactions.
-    """
-
-    species_tuple = tuple([spc.copy(deep=True) for spc in species_tuple])
-
-    reactions = getDB('kinetics').generate_reactions_from_families(species_tuple)
-
-    return reactions
+        for i, spc_tuple in enumerate(spc_tuples):
+            if any([len(spc.molecule[0].atoms) > min_atoms for spc in spc_tuple]):
+                spc_tuples[i] = (spc_tuple, split_list)
+            else:
+                spc_tuples[i] = (spc_tuple,)
 
+    return list(react(*spc_tuples))
 
diff --git a/rmgpy/rmg/reactTest.py b/rmgpy/rmg/reactTest.py
index d62559d7230..b70ecdad0fd 100644
--- a/rmgpy/rmg/reactTest.py
+++ b/rmgpy/rmg/reactTest.py
@@ -38,7 +38,7 @@
 from rmgpy.species import Species
 
 from rmgpy.rmg.main import RMG
-from rmgpy.rmg.react import react, reactAll
+from rmgpy.rmg.react import react, react_all
 
 ###################################################
 
@@ -91,7 +91,7 @@ def testReactMultiproc(self):
 
         spcA = Species().fromSMILES('[OH]')
         spcs = [Species().fromSMILES('CC'), Species().fromSMILES('[CH3]')]
-        spcTuples = [(spcA, spc, ['H_Abstraction']) for spc in spcs]
+        spcTuples = [((spcA, spc), ['H_Abstraction']) for spc in spcs]
 
         reactionList = list(react(*spcTuples))
         self.assertIsNotNone(reactionList)
@@ -112,7 +112,7 @@ def testReactAll(self):
                 ]
 
         N = len(spcs)
-        rxns = reactAll(spcs, N, np.ones(N), np.ones([N,N]), np.ones([N,N,N]))
+        rxns = react_all(spcs, N, np.ones(N), np.ones([N, N]), np.ones([N, N, N]))
         self.assertIsNotNone(rxns)
         self.assertTrue(all([isinstance(rxn, TemplateReaction) for rxn in rxns]))
 
diff --git a/rmgpy/rmg/rmgTest.py b/rmgpy/rmg/rmgTest.py
index 9e41e9e34dc..8b14e919f65 100644
--- a/rmgpy/rmg/rmgTest.py
+++ b/rmgpy/rmg/rmgTest.py
@@ -37,7 +37,7 @@
 from rmgpy import settings
 from rmgpy.data.rmg import RMGDatabase
 from rmgpy.molecule import Molecule
-from rmgpy.rmg.react import react
+from rmgpy.rmg.react import react_species
 from rmgpy.restart import saveRestartFile
 import rmgpy
 from rmgpy.data.base import ForbiddenStructures
@@ -97,7 +97,7 @@ def testDeterministicReactionTemplateMatching(self):
         # react
         spc = Species().fromSMILES("O=C[C]=C")
         spc.generate_resonance_structures()
-        newReactions = react((spc,))
+        newReactions = react_species((spc,))
 
         # try to pick out the target reaction 
         mol_H = Molecule().fromSMILES("[H]")
@@ -111,7 +111,7 @@ def testDeterministicReactionTemplateMatching(self):
 
         # react again
         newReactions_reverse = []
-        newReactions_reverse.extend(react((spc,)))
+        newReactions_reverse.extend(react_species((spc,)))
 
         # try to pick out the target reaction 
         target_rxns_reverse = findTargetRxnsContaining(mol_H, mol_C3H2O, newReactions_reverse)
@@ -185,7 +185,7 @@ def testRestartFileGenerationAndParsing(self):
         self.rmg.reactionModel.core.species.append(spc2)
 
         newReactions = []
-        newReactions.extend(react((spc1, spc2, ['H_Abstraction'])))
+        newReactions.extend(react_species((spc1, spc2)))
 
         # process newly generated reactions to make sure no duplicated reactions
         self.rmg.reactionModel.processNewReactions(newReactions, spc2, None)