Merge pull request #240 from Steinbeck-Lab/dev-kohulan

fix: Documentation, CIP, 3D Mol #236,#237,#238 and #239
Steinbeck-Lab · Jun 26, 2023 · 22eadf9 · 22eadf9
2 parents a558776 + b637d0e
commit 22eadf9
Show file tree

Hide file tree

Showing 3 changed files with 86 additions and 40 deletions.
diff --git a/app/modules/toolkits/cdk_wrapper.py b/app/modules/toolkits/cdk_wrapper.py
@@ -232,30 +232,57 @@ def getTanimotoSimilarityCDK(smiles1: str, smiles2: str):
     Args (str,str): SMILES strings.
     Returns (float): Tanimoto similarity.
     """
-    if any(char.isspace() for char in smiles1):
-        smiles1 = smiles1.replace(" ", "+")
-    if any(char.isspace() for char in smiles2):
-        smiles2 = smiles2.replace(" ", "+")
-
     Tanimoto = JClass(cdk_base + ".similarity.Tanimoto")
     SCOB = JClass(cdk_base + ".silent.SilentChemObjectBuilder")
     SmilesParser = JClass(cdk_base + ".smiles.SmilesParser")(SCOB.getInstance())
     PubchemFingerprinter = JClass(cdk_base + ".fingerprint.PubchemFingerprinter")(
         SCOB.getInstance()
     )
-
-    # parse molecules to get IAtomContainers
-    mol1 = SmilesParser.parseSmiles(smiles1)
-    mol2 = SmilesParser.parseSmiles(smiles2)
-
-    # Generate BitSets using PubChemFingerprinter
-    fingerprint1 = PubchemFingerprinter.getBitFingerprint(mol1).asBitSet()
-    fingerprint2 = PubchemFingerprinter.getBitFingerprint(mol2).asBitSet()
-
-    # Calculate Tanimoto similarity
-    Similarity = Tanimoto.calculate(fingerprint1, fingerprint2)
-
-    return "{:.5f}".format(float(str(Similarity)))
+    CDKHydrogenAdder = JClass(cdk_base + ".tools.CDKHydrogenAdder").getInstance(
+        SCOB.getInstance()
+    )
+    AtomContainerManipulator = JClass(
+        cdk_base + ".tools.manipulator.AtomContainerManipulator"
+    )
+    Cycles = JClass(cdk_base + ".graph.Cycles")
+    ElectronDonation = JClass(cdk_base + ".aromaticity.ElectronDonation")
+    Aromaticity = JClass(cdk_base + ".aromaticity.Aromaticity")(
+        ElectronDonation.cdk(), Cycles.cdkAromaticSet()
+    )
+    try:
+        # parse molecules to get IAtomContainers
+        mol1 = SmilesParser.parseSmiles(smiles1)
+        mol2 = SmilesParser.parseSmiles(smiles2)
+    except Exception as e:
+        print(e)
+        return "Check the SMILES string for errors"
+    if mol1 and mol2:
+        # perceive atom types and configure atoms
+        AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol1)
+        AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol2)
+
+        # add Implicit Hydrogens
+        CDKHydrogenAdder.addImplicitHydrogens(mol1)
+        CDKHydrogenAdder.addImplicitHydrogens(mol2)
+
+        # convert implicit to explicit Hydrogens
+        AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol1)
+        AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol2)
+
+        # Apply Aromaticity
+        Aromaticity.apply(mol1)
+        Aromaticity.apply(mol2)
+
+        # Generate BitSets using PubChemFingerprinter
+        fingerprint1 = PubchemFingerprinter.getBitFingerprint(mol1).asBitSet()
+        fingerprint2 = PubchemFingerprinter.getBitFingerprint(mol2).asBitSet()
+
+        # Calculate Tanimoto similarity
+        Similarity = Tanimoto.calculate(fingerprint1, fingerprint2)
+
+        return "{:.5f}".format(float(str(Similarity)))
+    else:
+        return "Check the SMILES string for errors"
 
 
 def getCIPAnnotation(smiles: str):
@@ -288,6 +315,7 @@ def getCIPAnnotation(smiles: str):
         ):
             atom.setProperty(StandardGenerator.ANNOTATION_LABEL, "(?)")
 
+    # Iterate over bonds
     for bond in mol.bonds():
         if bond.getOrder() != IBond.Order.DOUBLE:
             continue
@@ -299,16 +327,18 @@ def getCIPAnnotation(smiles: str):
             and stereocenters.isStereocenter(begIdx)
             and stereocenters.isStereocenter(endIdx)
         ):
-            # only if not in a small ring <7
+            # Check if not in a small ring <7
             if Cycles.smallRingSize(bond, 7) == 0:
                 bond.setProperty(StandardGenerator.ANNOTATION_LABEL, "(?)")
+
     # no defined stereo?
     if not mol.stereoElements().iterator().hasNext():
         return mol
 
+    # Call the Java method
     CdkLabeller.label(mol)
 
-    # update to label appropriately for racmic and relative stereochemistry
+    # Update to label appropriately for racemic and relative stereochemistry
     for se in mol.stereoElements():
         if se.getConfigClass() == IStereoElement.TH and se.getGroupInfo() != 0:
             focus = se.getFocus()
@@ -329,28 +359,31 @@ def getCIPAnnotation(smiles: str):
                             BaseMol.CIP_LABEL_KEY, label.toString() + inv.name()
                         )
                 elif (se.getGroupInfo() & IStereoElement.GRP_REL) != 0:
-                    if label in [Descriptor.R, Descriptor.S]:
+                    if label == Descriptor.R or label == Descriptor.S:
                         focus.setProperty(BaseMol.CIP_LABEL_KEY, label.toString() + "*")
 
+    # Iterate over atoms
     for atom in mol.atoms():
         if atom.getProperty(BaseMol.CONF_INDEX) is not None:
             atom.setProperty(
                 StandardGenerator.ANNOTATION_LABEL,
                 StandardGenerator.ITALIC_DISPLAY_PREFIX
-                + str(atom.getProperty(BaseMol.CONF_INDEX)),
+                + atom.getProperty(BaseMol.CONF_INDEX).toString(),
             )
         elif atom.getProperty(BaseMol.CIP_LABEL_KEY) is not None:
             atom.setProperty(
                 StandardGenerator.ANNOTATION_LABEL,
                 StandardGenerator.ITALIC_DISPLAY_PREFIX
-                + str(atom.getProperty(BaseMol.CIP_LABEL_KEY)),
+                + atom.getProperty(BaseMol.CIP_LABEL_KEY).toString(),
             )
+
+    # Iterate over bonds
     for bond in mol.bonds():
         if bond.getProperty(BaseMol.CIP_LABEL_KEY) is not None:
             bond.setProperty(
                 StandardGenerator.ANNOTATION_LABEL,
                 StandardGenerator.ITALIC_DISPLAY_PREFIX
-                + bond.getProperty(BaseMol.CIP_LABEL_KEY),
+                + bond.getProperty(BaseMol.CIP_LABEL_KEY).toString(),
             )
 
     return mol

diff --git a/app/modules/toolkits/rdkit_wrapper.py b/app/modules/toolkits/rdkit_wrapper.py
@@ -102,14 +102,17 @@ def get3Dconformers(smiles, depict=True):
     else:
         mol = Chem.MolFromSmiles(smiles)
     if mol:
-        AllChem.Compute2DCoords(mol)
         mol = Chem.AddHs(mol)
         AllChem.EmbedMolecule(mol, randomSeed=0xF00D)
-        AllChem.MMFFOptimizeMolecule(mol, maxIters=200)
+        try:
+            AllChem.MMFFOptimizeMolecule(mol)
+        except Exception as e:
+            print(e)
+            AllChem.EmbedMolecule(mol, randomSeed=0xF00D)
         if depict:
             return Chem.MolToMolBlock(mol)
         else:
-            mol = Chem.RemoveHs(mol)
+            # mol = Chem.RemoveHs(mol)
             return Chem.MolToMolBlock(mol)
     else:
         return "Error reading SMILES string, check again."
@@ -126,15 +129,17 @@ def getTanimotoSimilarityRDKit(smiles1, smiles2):
     # create two example molecules
     mol1 = checkSMILES(smiles1)
     mol2 = checkSMILES(smiles2)
+    if mol1 and mol2:
+        # generate Morgan fingerprints for each molecule
+        fp1 = AllChem.GetMorganFingerprintAsBitVect(mol1, 2, nBits=1024)
+        fp2 = AllChem.GetMorganFingerprintAsBitVect(mol2, 2, nBits=1024)
 
-    # generate Morgan fingerprints for each molecule
-    fp1 = AllChem.GetMorganFingerprintAsBitVect(mol1, 2, nBits=1024)
-    fp2 = AllChem.GetMorganFingerprintAsBitVect(mol2, 2, nBits=1024)
-
-    # calculate the Tanimoto similarity between the fingerprints
-    similarity = DataStructs.TanimotoSimilarity(fp1, fp2)
+        # calculate the Tanimoto similarity between the fingerprints
+        similarity = DataStructs.TanimotoSimilarity(fp1, fp2)
 
-    return similarity
+        return similarity
+    else:
+        return "Check SMILES strings for Errors"
 
 
 async def getRDKitHOSECodes(smiles: str, noOfSpheres: int):

diff --git a/app/routers/chem.py b/app/routers/chem.py
@@ -213,9 +213,17 @@ async def NPlikeliness_Score(smiles: str):
 async def Tanimoto_Similarity(smiles: str, toolkit: Optional[str] = "cdk"):
     """
     Generate the Tanimoto similarity index for a given pair of SMILES strings.
+    Using cdk as default parameter the Tanimoto is calculated using Pubchemfingerprints.
+    https://cdk.github.io/cdk/2.8/docs/api/org/openscience/cdk/fingerprint/PubchemFingerprinter.html
+
+    Using rdkit the Tanimoto is calculated using Morganfingerprints with radius: 2 and nBits=1024.
+    Further modifications check the rdkit_wrapper module.
+
+    Usage: Please give a SMILES pair with "," separated. Example: api.naturalproducts.net/latest/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C
 
     - **SMILES**: required (query)
-    - **toolkit**: optional (defaults: cdk)
+    - **toolkit**: optional (defaults: cdk, rdkit also used)
+
     """
     if len(smiles.split(",")) == 2:
         try:
@@ -225,16 +233,16 @@ async def Tanimoto_Similarity(smiles: str, toolkit: Optional[str] = "cdk"):
             else:
                 Tanimoto = getTanimotoSimilarityCDK(smiles1, smiles2)
             return Tanimoto
-        except ValueError:
-            return 'Please give a SMILES pair with "," separated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)'
+        except Exception:
+            return 'Please give a SMILES pair with "," separated. Example: api.naturalproducts.net/latest/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C'
     elif len(smiles.split(",")) > 2:
         try:
             matrix = getTanimotoSimilarity(smiles, toolkit)
             return Response(content=matrix, media_type="text/html")
-        except ValueError:
-            return 'Please give a SMILES pair with "," separated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)'
+        except Exception:
+            return 'Please give a SMILES pair with "," separated. Example: api.naturalproducts.net/latest/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C'
     else:
-        return 'Please give a SMILES pair with "," separated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)'
+        return 'Please give a SMILES pair with "," separated. Example: api.naturalproducts.net/latest/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C'
 
 
 @router.get("/coconut/pre-processing")