feat: Add display Tanimoto similarity matrix #131

Steinbeck-Lab · Apr 26, 2023 · 2ed0db3 · 2ed0db3
1 parent 14be128
commit 2ed0db3
Show file tree

Hide file tree

Showing 4 changed files with 78 additions and 9 deletions.
diff --git a/app/modules/alldescriptors.py b/app/modules/alldescriptors.py
@@ -1,6 +1,16 @@
 from rdkit.Chem import Descriptors, QED, Lipinski, rdMolDescriptors, rdmolops
-from app.modules.rdkitmodules import checkRo5Violations, checkSMILES
-from app.modules.cdkmodules import getCDKSDG, JClass, cdk_base, getAromaticRingCount
+from app.modules.rdkitmodules import (
+    checkRo5Violations,
+    checkSMILES,
+    getTanimotoSimilarityRDKit,
+)
+from app.modules.cdkmodules import (
+    getCDKSDG,
+    JClass,
+    cdk_base,
+    getAromaticRingCount,
+    getTanimotoSimilarityCDK,
+)
 
 
 def getAllRDKitDescriptors(smiles: str):
@@ -188,3 +198,53 @@ def getCDKRDKitcombinedDescriptors(smiles: str):
         return combinedDict
     else:
         return "Error dictionary lenth invalid"
+
+
+def getTable(tanimoto_values: list):
+    """
+    This function converts a list into
+    a html table.
+    Args (list): Tanimoto similarity values.
+    Returns (html): HTML table.
+    """
+    table_html = "<table>"
+    # Add header row with column indexes
+    table_html += "<tr><th></th>"
+    for j in range(len(tanimoto_values[0])):
+        table_html += f"<th>{j}</th>"
+    table_html += "</tr>"
+    # Add data rows with row indexes
+    for i, row in enumerate(tanimoto_values):
+        table_html += "<tr>"
+        table_html += f"<td>{i}</td>"
+        for cell in row:
+            table_html += f"<td>{cell}</td>"
+        table_html += "</tr>"
+    table_html += "</table>"
+    return table_html
+
+
+def getTanimotoSimilarity(smileslist: str, toolkit: str = "cdk"):
+    """
+    Take a list of SMILES strings split them
+    and calculate Tanimoto similarity index
+    between every SMILES string pair and return a matrix.
+    Args (str): SMILES strings.
+    Returns (list): Tanimoto similarity matrix.
+    """
+    # Parse comma-separated list of SMILES into a list of SMILES strings
+    smiles_list = smileslist.split(",")
+
+    # Create empty matrix
+    matrix = [[0] * len(smiles_list) for i in range(len(smiles_list))]
+
+    # Multiply each value with each other and store in matrix
+    for i in range(len(smiles_list)):
+        for j in range(len(smiles_list)):
+            if toolkit == "rdkit":
+                matrix[i][j] = getTanimotoSimilarityRDKit(
+                    smiles_list[i], smiles_list[j]
+                )
+            else:
+                matrix[i][j] = getTanimotoSimilarityCDK(smiles_list[i], smiles_list[j])
+    return getTable(matrix)
diff --git a/app/modules/cdkmodules.py b/app/modules/cdkmodules.py
@@ -239,7 +239,7 @@ def getCDKDescriptors(smiles: str):
         )
 
 
-def getTanimotoSimilarity(smiles1: str, smiles2: str):
+def getTanimotoSimilarityCDK(smiles1: str, smiles2: str):
     """
     Take two SMILES strings and calculate
     Tanimoto similarity index using Pubchem

diff --git a/app/modules/rdkitmodules.py b/app/modules/rdkitmodules.py
@@ -114,7 +114,7 @@ def get3Dconformers(smiles, depict=True):
         return "Error reading SMILES string, check again."
 
 
-def getTanimoto(smiles1, smiles2):
+def getTanimotoSimilarityRDKit(smiles1, smiles2):
     """
     Take two SMILES strings and calculate
     Tanimoto similarity index using Morgan

diff --git a/app/routers/chem.py b/app/routers/chem.py
@@ -9,10 +9,11 @@
 from fastapi.responses import Response, HTMLResponse
 from app.modules.npscorer import getNPScore
 from app.modules.classyfire import classify, result
-from app.modules.cdkmodules import getCDKSDGMol, getTanimotoSimilarity
+from app.modules.cdkmodules import getCDKSDGMol, getTanimotoSimilarityCDK
 from app.modules.depict import getRDKitDepiction, getCDKDepiction
-from app.modules.rdkitmodules import get3Dconformers, getTanimoto
+from app.modules.rdkitmodules import get3Dconformers, getTanimotoSimilarityRDKit
 from app.modules.coconutdescriptors import getCOCONUTDescriptors
+from app.modules.alldescriptors import getTanimotoSimilarity
 import pandas as pd
 from fastapi.templating import Jinja2Templates
 
@@ -162,16 +163,24 @@ async def Tanimoto_Similarity(smiles: str, toolkit: Optional[str] = "cdk"):
     - **SMILES**: required (query)
     - **toolkit**: optional (defaults: cdk)
     """
-    if smiles:
+    if len(smiles.split(",")) == 2:
         try:
             smiles1, smiles2 = smiles.split(",")
             if toolkit == "rdkit":
-                Tanimoto = getTanimoto(smiles1, smiles2)
+                Tanimoto = getTanimotoSimilarityRDKit(smiles1, smiles2)
             else:
-                Tanimoto = getTanimotoSimilarity(smiles1, smiles2)
+                Tanimoto = getTanimotoSimilarityCDK(smiles1, smiles2)
             return Tanimoto
         except ValueError:
             return 'Please give a SMILES pair with "," seperated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)'
+    elif len(smiles.split(",")) > 2:
+        try:
+            matrix = getTanimotoSimilarity(smiles, toolkit)
+            return Response(content=matrix, media_type="text/html")
+        except ValueError:
+            return 'Please give a SMILES pair with "," seperated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)'
+    else:
+        return 'Please give a SMILES pair with "," seperated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)'
 
 
 @router.get("/depict")