Skip to content

Commit

Permalink
Merge pull request #10 from basf/add-inorganic-filter
Browse files Browse the repository at this point in the history
Add inorganic filter
  • Loading branch information
JenniferHem authored May 15, 2024
2 parents a35458a + 40cfb0d commit 006cd00
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 19 deletions.
8 changes: 7 additions & 1 deletion molpipeline/mol2mol/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
"""Init the module for mol2mol pipeline elements."""

from molpipeline.mol2mol.filter import ElementFilter, EmptyMoleculeFilter, MixtureFilter
from molpipeline.mol2mol.filter import (
ElementFilter,
EmptyMoleculeFilter,
InorganicsFilter,
MixtureFilter,
)
from molpipeline.mol2mol.reaction import MolToMolReaction
from molpipeline.mol2mol.scaffolds import MakeScaffoldGeneric, MurckoScaffold
from molpipeline.mol2mol.standardization import (
Expand Down Expand Up @@ -35,4 +40,5 @@
"SaltRemover",
"SolventRemover",
"Uncharger",
"InorganicsFilter",
)
84 changes: 67 additions & 17 deletions molpipeline/mol2mol/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,22 @@
class ElementFilter(_MolToMolPipelineElement):
"""ElementFilter which removes molecules containing chemical elements other than specified."""

DEFAULT_ALLOWED_ELEMENT_NUMBERS = [
1,
5,
6,
7,
8,
9,
14,
15,
16,
17,
34,
35,
53,
]

def __init__(
self,
allowed_element_numbers: Optional[list[int]] = None,
Expand All @@ -44,21 +60,7 @@ def __init__(
"""
super().__init__(name=name, n_jobs=n_jobs, uuid=uuid)
if allowed_element_numbers is None:
allowed_element_numbers = [
1,
5,
6,
7,
8,
9,
14,
15,
16,
17,
34,
35,
53,
]
allowed_element_numbers = self.DEFAULT_ALLOWED_ELEMENT_NUMBERS
if not isinstance(allowed_element_numbers, set):
self.allowed_element_numbers = set(allowed_element_numbers)
else:
Expand Down Expand Up @@ -149,7 +151,7 @@ def __int__(
uuid: str, optional (default: None)
Unique identifier of the pipeline element.
"""
super().__init__(name=name, n_jobs=n_jobs)
super().__init__(name=name, n_jobs=n_jobs, uuid=uuid)

def pretransform_single(self, value: RDKitMol) -> OptionalMol:
"""Invalidate molecule containing multiple fragments.
Expand Down Expand Up @@ -195,7 +197,7 @@ def __init__(
uuid: str, optional (default: None)
Unique identifier of the pipeline element.
"""
super().__init__(name=name, n_jobs=n_jobs)
super().__init__(name=name, n_jobs=n_jobs, uuid=uuid)

def pretransform_single(self, value: RDKitMol) -> OptionalMol:
"""Invalidate empty molecule.
Expand All @@ -213,3 +215,51 @@ def pretransform_single(self, value: RDKitMol) -> OptionalMol:
if value.GetNumAtoms() == 0:
return InvalidInstance(self.uuid, "Molecule contains no atoms.", self.name)
return value


class InorganicsFilter(_MolToMolPipelineElement):
"""Filters Molecules which do not contain any organic (i.e. Carbon) atoms."""

CARBON_INORGANICS = ["O=C=O", "[C-]#[O+]"] # CO2 and CO are not organic

def __init__(
self,
name: str = "InorganicsFilter",
n_jobs: int = 1,
uuid: Optional[str] = None,
) -> None:
"""Initialize InorganicsFilter.
Parameters
----------
name: str, optional (default: "InorganicsFilter")
Name of the pipeline element.
n_jobs: int, optional (default: 1)
Number of parallel jobs to use.
uuid: str, optional (default: None)
Unique identifier of the pipeline element.
"""
super().__init__(name=name, n_jobs=n_jobs, uuid=uuid)

def pretransform_single(self, value: RDKitMol) -> OptionalMol:
"""Invalidate molecules not containing a carbon atom.
Parameters
----------
value: RDKitMol
Molecule to check.
Returns
-------
OptionalMol
Molecule if it contains carbon, else InvalidInstance.
"""
if not any(atom.GetAtomicNum() == 6 for atom in value.GetAtoms()):
return InvalidInstance(
self.uuid, "Molecule contains no organic atoms.", self.name
)
smiles = Chem.MolToSmiles(value)
print(smiles)
if smiles in self.CARBON_INORGANICS:
return InvalidInstance(self.uuid, "Molecule is not organic.", self.name)
return value
49 changes: 48 additions & 1 deletion tests/test_elements/test_mol2mol/test_mol2mol_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from molpipeline import ErrorFilter, FilterReinserter, Pipeline
from molpipeline.any2mol import SmilesToMol
from molpipeline.mol2any import MolToSmiles
from molpipeline.mol2mol import ElementFilter, MixtureFilter
from molpipeline.mol2mol import ElementFilter, InorganicsFilter, MixtureFilter

# pylint: disable=duplicate-code # test case molecules are allowed to be duplicated
SMILES_ANTIMONY = "[SbH6+3]"
Expand Down Expand Up @@ -97,6 +97,53 @@ def test_invalidate_mixtures(self) -> None:
mols_processed = pipeline.fit_transform(mol_list)
self.assertEqual(expected_invalidated_mol_list, mols_processed)

def test_inorganic_filter(self) -> None:
"""Test if molecules are filtered correctly by allowed chemical elements.
Returns
-------
None
"""
smiles2mol = SmilesToMol()
inorganics_filter = InorganicsFilter()
mol2smiles = MolToSmiles()
error_filter = ErrorFilter.from_element_list(
[smiles2mol, inorganics_filter, mol2smiles]
)
pipeline = Pipeline(
[
("Smiles2Mol", smiles2mol),
("ElementFilter", inorganics_filter),
("Mol2Smiles", mol2smiles),
("ErrorFilter", error_filter),
],
)
filtered_smiles = pipeline.fit_transform(
[
SMILES_ANTIMONY,
SMILES_BENZENE,
SMILES_CHLOROBENZENE,
SMILES_METAL_AU,
SMILES_CL_BR,
]
)
self.assertEqual(
filtered_smiles,
[SMILES_BENZENE, SMILES_CHLOROBENZENE, SMILES_METAL_AU, SMILES_CL_BR],
)

filtered_inroganics = pipeline.fit_transform(["O=C=O", "[O+]#[C-]"])
self.assertEqual(
filtered_inroganics,
[],
)

filtered_inroganics = pipeline.fit_transform(InorganicsFilter.CARBON_INORGANICS)
self.assertEqual(
filtered_inroganics,
[],
)


if __name__ == "__main__":
unittest.main()

0 comments on commit 006cd00

Please sign in to comment.