Skip to content

Commit

Permalink
notebooks: beautify advanced_02 (#50)
Browse files Browse the repository at this point in the history
- Fix typos, remove wrong type hints,
      add more explanation
  • Loading branch information
JochenSiegWork authored Jul 11, 2024
1 parent 202ca22 commit 188edb2
Showing 1 changed file with 15 additions and 10 deletions.
25 changes: 15 additions & 10 deletions notebooks/advanced_02_add_custom_pipeline_elements.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"\n",
"## Example using `MolToDescriptorPipelineElement`\n",
"\n",
"The `MolToDescriptorPipelineElement` is a specification of `MolToAnyPipelineElement` adding useful functionality to the interface, like the number of features, the names of features and optional feature normalization. Analogously, the `MolToFingerprintPipelineElement` provides some useful functions for molecular fingerprint featurizes.\n",
"The `MolToDescriptorPipelineElement` is a specification of `MolToAnyPipelineElement` adding useful functionality to the interface, like the number of features, the names of features and optional feature normalization. Analogously, the `MolToFingerprintPipelineElement` provides some useful functions for molecular fingerprint featurization.\n",
"\n",
"In the following example, we demonstrate how to implement a new molecular descriptor representing the composition of a molecule using the counts of chemical element symbols. "
]
Expand All @@ -35,18 +35,15 @@
"import numpy as np\n",
"import numpy.typing as npt\n",
"\n",
"from rdkit import Chem\n",
"\n",
"from molpipeline.abstract_pipeline_elements.core import MolToAnyPipelineElement\n",
"from molpipeline.abstract_pipeline_elements.core import InvalidInstance\n",
"from molpipeline.abstract_pipeline_elements.mol2any import (\n",
" MolToDescriptorPipelineElement,\n",
")\n",
"from molpipeline.utils.molpipeline_types import AnyTransformer, RDKitMol\n",
"\n",
"\n",
"class ElementCountDescriptor(MolToDescriptorPipelineElement):\n",
" \"\"\"Element count descriptor\"\"\"\n",
" \"\"\"Element count descriptor.\"\"\"\n",
"\n",
" def __init__(\n",
" self,\n",
Expand All @@ -56,6 +53,7 @@
" n_jobs: int = 1,\n",
" uuid: str | None = None,\n",
" ) -> None:\n",
" \"\"\"Construct a new ElementCountDescriptor.\"\"\"\n",
" super().__init__(\n",
" standardizer=standardizer,\n",
" name=name,\n",
Expand All @@ -77,11 +75,8 @@
" \"\"\"Return a copy of the descriptor list.\"\"\"\n",
" return [f\"atom_count_{atom_number}\" for atom_number in self.elements_dict]\n",
"\n",
" #\n",
" def pretransform_single(\n",
" self, value: RDKitMol\n",
" ) -> npt.NDArray[np.float64] | InvalidInstance:\n",
" \"\"\"Transform an RDKit molecule to the element count feature vector\"\"\"\n",
" def pretransform_single(self, value: RDKitMol) -> npt.NDArray[np.float64]:\n",
" \"\"\"Transform an RDKit molecule to the element count feature vector.\"\"\"\n",
" feature_vector = np.zeros(len(self.elements_dict))\n",
" for atom in value.GetAtoms():\n",
" atomic_number = atom.GetAtomicNum()\n",
Expand All @@ -108,13 +103,23 @@
}
],
"source": [
"from rdkit import Chem\n",
"\n",
"# let's create a new ElementCountDescriptor counting carbon, nitrogen, oxygen and fluor atoms in the molecule\n",
"counter = ElementCountDescriptor(elements_to_count=[6, 7, 8, 9])\n",
"\n",
"# let's transform the molecule to our descriptor\n",
"counter.transform([Chem.MolFromSmiles(\"CCO\")])"
]
},
{
"cell_type": "markdown",
"id": "867ce38d-ea34-4db3-8d79-b2637abacbee",
"metadata": {},
"source": [
"The resulting feature vector shows 2 carbons, 0 nitrogens, 1 oxygen and 0 fluorines."
]
},
{
"cell_type": "markdown",
"id": "caa290aa-fc19-4764-b19a-7d511d43f442",
Expand Down

0 comments on commit 188edb2

Please sign in to comment.