Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve group additivity ranking for aromatic species #1731

Merged
merged 4 commits into from
Sep 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions rmgpy/data/thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1785,24 +1785,29 @@ def prioritize_thermo(self, species, thermo_data_list):
if species.molecule[0].is_cyclic():
# Special treatment for cyclic compounds
entries = []
for thermo in thermo_data_list:
for i, thermo in enumerate(thermo_data_list):
ring_groups, polycyclic_groups = self.get_ring_groups_from_comments(thermo)

# Use rank as a metric for prioritizing thermo.
# The smaller the rank, the better.
sum_rank = np.sum(
[3 if entry.rank is None else entry.rank for entry in ring_groups + polycyclic_groups])
entries.append((thermo, sum_rank))

# Sort first by rank, then by enthalpy at 298 K
entries = sorted(entries, key=lambda entry: (entry[1], entry[0].get_enthalpy(298.)))
indices = [thermo_data_list.index(entry[0]) for entry in entries]
# Also use number of aromatic rings as a metric, more aromatic rings is better
# Group values are generally fitted to the most aromatic resonance structure
num_arom_rings = species.molecule[i].count_aromatic_rings()

entries.append((i, thermo, sum_rank, -num_arom_rings))

# Sort first by number of aromatic rings, then rank, then by enthalpy at 298 K
entries.sort(key=lambda entry: (entry[3], entry[2], entry[1].get_enthalpy(298.)))
indices = [entry[0] for entry in entries]
else:
# For noncyclics, default to original algorithm of ordering thermo based on the most stable enthalpy
H298 = np.array([t.get_enthalpy(298.) for t in thermo_data_list])
indices = H298.argsort()
indices = np.array([i for i in indices if species.molecule[i].reactive] +
[i for i in indices if not species.molecule[i].reactive])
indices = H298.argsort().tolist()
# Sort indices again by the Molecule.reactive flag
indices.sort(key=lambda index: species.molecule[index].reactive, reverse=True)
else:
indices = [0]
return indices
Expand Down
9 changes: 9 additions & 0 deletions rmgpy/data/thermoTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,15 @@ def test_thermo_for_mixed_reactive_and_nonreactive_molecules(self):
self.assertTrue(thermo_gav2.get_enthalpy(298) > thermo_gav1.get_enthalpy(298),
msg="Did not select the reactive molecule for thermo")

def test_thermo_for_aromatic_radicals(self):
"""Test that we use the most aromatic resonance structure for thermo estimation"""
spec = Species(smiles='C=[C]c1ccc2ccccc2c1') # vinylnaphthalene radical
spec.generate_resonance_structures()
thermo_gav = self.database.get_thermo_data_from_groups(spec)
self.assertAlmostEqual(thermo_gav.H298.value_si / 4184, 107, delta=1)
self.assertIn('group additivity', thermo_gav.comment, 'Thermo not found from GAV, test purpose not fulfilled.')
self.assertIn('polycyclic(s2_6_6_naphthalene)', thermo_gav.comment)


class TestThermoAccuracy(unittest.TestCase):
"""
Expand Down
2 changes: 2 additions & 0 deletions rmgpy/molecule/molecule.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ cdef class Molecule(Graph):

cpdef identify_ring_membership(self)

cpdef int count_aromatic_rings(self)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you change the commit message to lower_case_with_underscores to be consistent with the code?
(same comment for the Minor code improvements to ThermoDatabase.prioritizeThermo commit)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!


cpdef tuple get_aromatic_rings(self, list rings=?)

cpdef list get_deterministic_sssr(self)
Expand Down
21 changes: 21 additions & 0 deletions rmgpy/molecule/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -2218,6 +2218,27 @@ def identify_ring_membership(self):
atom.props['inRing'] = True
break

def count_aromatic_rings(self):
"""
Count the number of aromatic rings in the current molecule, as determined by the benzene bond type.
This is purely dependent on representation and is unrelated to the actual aromaticity of the molecule.

Returns an integer corresponding to the number or aromatic rings.
"""
cython.declare(rings=list, count=int, ring=list, bonds=list, bond=Bond)
rings = self.get_relevant_cycles()
count = 0
for ring in rings:
if len(ring) != 6:
# We currently only recognize 6-membered rings as being aromatic
continue

bonds = self.get_edges_in_cycle(ring)
if all([bond.is_benzene() for bond in bonds]):
count += 1

return count

def get_aromatic_rings(self, rings=None):
"""
Returns all aromatic rings as a list of atoms and a list of bonds.
Expand Down
7 changes: 7 additions & 0 deletions rmgpy/molecule/moleculeTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2650,6 +2650,13 @@ def test_enumerate_bonds(self):
self.assertEqual(bonds['H-O'], 2)
self.assertEqual(bonds['H~O'], 2)

def test_count_aromatic_rings(self):
"""Test that we can count aromatic rings correctly."""
mol = Molecule(smiles='c12ccccc1cccc2')
out = mol.generate_resonance_structures()
result = [m.count_aromatic_rings() for m in out]

self.assertEqual(result, [2, 1, 0])

################################################################################

Expand Down