notebooks: beautify advanced_02 (#50)

- Fix typos, remove wrong type hints, add more explanation
basf · Jul 11, 2024 · 188edb2 · 188edb2
1 parent 202ca22
commit 188edb2
Showing 1 changed file with 15 additions and 10 deletions.
diff --git a/notebooks/advanced_02_add_custom_pipeline_elements.ipynb b/notebooks/advanced_02_add_custom_pipeline_elements.ipynb
@@ -20,7 +20,7 @@
     "\n",
     "## Example using `MolToDescriptorPipelineElement`\n",
     "\n",
-    "The `MolToDescriptorPipelineElement` is a specification of `MolToAnyPipelineElement` adding useful functionality to the interface, like the number of features, the names of features and optional feature normalization. Analogously, the `MolToFingerprintPipelineElement` provides some useful functions for molecular fingerprint featurizes.\n",
+    "The `MolToDescriptorPipelineElement` is a specification of `MolToAnyPipelineElement` adding useful functionality to the interface, like the number of features, the names of features and optional feature normalization. Analogously, the `MolToFingerprintPipelineElement` provides some useful functions for molecular fingerprint featurization.\n",
     "\n",
     "In the following example, we demonstrate how to implement a new molecular descriptor representing the composition of a molecule using the counts of chemical element symbols. "
    ]
@@ -35,18 +35,15 @@
     "import numpy as np\n",
     "import numpy.typing as npt\n",
     "\n",
-    "from rdkit import Chem\n",
-    "\n",
     "from molpipeline.abstract_pipeline_elements.core import MolToAnyPipelineElement\n",
-    "from molpipeline.abstract_pipeline_elements.core import InvalidInstance\n",
     "from molpipeline.abstract_pipeline_elements.mol2any import (\n",
     "    MolToDescriptorPipelineElement,\n",
     ")\n",
     "from molpipeline.utils.molpipeline_types import AnyTransformer, RDKitMol\n",
     "\n",
     "\n",
     "class ElementCountDescriptor(MolToDescriptorPipelineElement):\n",
-    "    \"\"\"Element count descriptor\"\"\"\n",
+    "    \"\"\"Element count descriptor.\"\"\"\n",
     "\n",
     "    def __init__(\n",
     "        self,\n",
@@ -56,6 +53,7 @@
     "        n_jobs: int = 1,\n",
     "        uuid: str | None = None,\n",
     "    ) -> None:\n",
+    "        \"\"\"Construct a new ElementCountDescriptor.\"\"\"\n",
     "        super().__init__(\n",
     "            standardizer=standardizer,\n",
     "            name=name,\n",
@@ -77,11 +75,8 @@
     "        \"\"\"Return a copy of the descriptor list.\"\"\"\n",
     "        return [f\"atom_count_{atom_number}\" for atom_number in self.elements_dict]\n",
     "\n",
-    "    #\n",
-    "    def pretransform_single(\n",
-    "        self, value: RDKitMol\n",
-    "    ) -> npt.NDArray[np.float64] | InvalidInstance:\n",
-    "        \"\"\"Transform an RDKit molecule to the element count feature vector\"\"\"\n",
+    "    def pretransform_single(self, value: RDKitMol) -> npt.NDArray[np.float64]:\n",
+    "        \"\"\"Transform an RDKit molecule to the element count feature vector.\"\"\"\n",
     "        feature_vector = np.zeros(len(self.elements_dict))\n",
     "        for atom in value.GetAtoms():\n",
     "            atomic_number = atom.GetAtomicNum()\n",
@@ -108,13 +103,23 @@
     }
    ],
    "source": [
+    "from rdkit import Chem\n",
+    "\n",
     "# let's create a new ElementCountDescriptor counting carbon, nitrogen, oxygen and fluor atoms in the molecule\n",
     "counter = ElementCountDescriptor(elements_to_count=[6, 7, 8, 9])\n",
     "\n",
     "# let's transform the molecule to our descriptor\n",
     "counter.transform([Chem.MolFromSmiles(\"CCO\")])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "867ce38d-ea34-4db3-8d79-b2637abacbee",
+   "metadata": {},
+   "source": [
+    "The resulting feature vector shows 2 carbons, 0 nitrogens, 1 oxygen and 0 fluorines."
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "caa290aa-fc19-4764-b19a-7d511d43f442",