Skip to content

Commit

Permalink
Merge pull request #240 from Steinbeck-Lab/dev-kohulan
Browse files Browse the repository at this point in the history
fix: Documentation, CIP, 3D Mol #236,#237,#238 and #239
  • Loading branch information
Kohulan authored Jun 26, 2023
2 parents a558776 + b637d0e commit 22eadf9
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 40 deletions.
81 changes: 57 additions & 24 deletions app/modules/toolkits/cdk_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,30 +232,57 @@ def getTanimotoSimilarityCDK(smiles1: str, smiles2: str):
Args (str,str): SMILES strings.
Returns (float): Tanimoto similarity.
"""
if any(char.isspace() for char in smiles1):
smiles1 = smiles1.replace(" ", "+")
if any(char.isspace() for char in smiles2):
smiles2 = smiles2.replace(" ", "+")

Tanimoto = JClass(cdk_base + ".similarity.Tanimoto")
SCOB = JClass(cdk_base + ".silent.SilentChemObjectBuilder")
SmilesParser = JClass(cdk_base + ".smiles.SmilesParser")(SCOB.getInstance())
PubchemFingerprinter = JClass(cdk_base + ".fingerprint.PubchemFingerprinter")(
SCOB.getInstance()
)

# parse molecules to get IAtomContainers
mol1 = SmilesParser.parseSmiles(smiles1)
mol2 = SmilesParser.parseSmiles(smiles2)

# Generate BitSets using PubChemFingerprinter
fingerprint1 = PubchemFingerprinter.getBitFingerprint(mol1).asBitSet()
fingerprint2 = PubchemFingerprinter.getBitFingerprint(mol2).asBitSet()

# Calculate Tanimoto similarity
Similarity = Tanimoto.calculate(fingerprint1, fingerprint2)

return "{:.5f}".format(float(str(Similarity)))
CDKHydrogenAdder = JClass(cdk_base + ".tools.CDKHydrogenAdder").getInstance(
SCOB.getInstance()
)
AtomContainerManipulator = JClass(
cdk_base + ".tools.manipulator.AtomContainerManipulator"
)
Cycles = JClass(cdk_base + ".graph.Cycles")
ElectronDonation = JClass(cdk_base + ".aromaticity.ElectronDonation")
Aromaticity = JClass(cdk_base + ".aromaticity.Aromaticity")(
ElectronDonation.cdk(), Cycles.cdkAromaticSet()
)
try:
# parse molecules to get IAtomContainers
mol1 = SmilesParser.parseSmiles(smiles1)
mol2 = SmilesParser.parseSmiles(smiles2)
except Exception as e:
print(e)
return "Check the SMILES string for errors"
if mol1 and mol2:
# perceive atom types and configure atoms
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol1)
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol2)

# add Implicit Hydrogens
CDKHydrogenAdder.addImplicitHydrogens(mol1)
CDKHydrogenAdder.addImplicitHydrogens(mol2)

# convert implicit to explicit Hydrogens
AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol1)
AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol2)

# Apply Aromaticity
Aromaticity.apply(mol1)
Aromaticity.apply(mol2)

# Generate BitSets using PubChemFingerprinter
fingerprint1 = PubchemFingerprinter.getBitFingerprint(mol1).asBitSet()
fingerprint2 = PubchemFingerprinter.getBitFingerprint(mol2).asBitSet()

# Calculate Tanimoto similarity
Similarity = Tanimoto.calculate(fingerprint1, fingerprint2)

return "{:.5f}".format(float(str(Similarity)))
else:
return "Check the SMILES string for errors"


def getCIPAnnotation(smiles: str):
Expand Down Expand Up @@ -288,6 +315,7 @@ def getCIPAnnotation(smiles: str):
):
atom.setProperty(StandardGenerator.ANNOTATION_LABEL, "(?)")

# Iterate over bonds
for bond in mol.bonds():
if bond.getOrder() != IBond.Order.DOUBLE:
continue
Expand All @@ -299,16 +327,18 @@ def getCIPAnnotation(smiles: str):
and stereocenters.isStereocenter(begIdx)
and stereocenters.isStereocenter(endIdx)
):
# only if not in a small ring <7
# Check if not in a small ring <7
if Cycles.smallRingSize(bond, 7) == 0:
bond.setProperty(StandardGenerator.ANNOTATION_LABEL, "(?)")

# no defined stereo?
if not mol.stereoElements().iterator().hasNext():
return mol

# Call the Java method
CdkLabeller.label(mol)

# update to label appropriately for racmic and relative stereochemistry
# Update to label appropriately for racemic and relative stereochemistry
for se in mol.stereoElements():
if se.getConfigClass() == IStereoElement.TH and se.getGroupInfo() != 0:
focus = se.getFocus()
Expand All @@ -329,28 +359,31 @@ def getCIPAnnotation(smiles: str):
BaseMol.CIP_LABEL_KEY, label.toString() + inv.name()
)
elif (se.getGroupInfo() & IStereoElement.GRP_REL) != 0:
if label in [Descriptor.R, Descriptor.S]:
if label == Descriptor.R or label == Descriptor.S:
focus.setProperty(BaseMol.CIP_LABEL_KEY, label.toString() + "*")

# Iterate over atoms
for atom in mol.atoms():
if atom.getProperty(BaseMol.CONF_INDEX) is not None:
atom.setProperty(
StandardGenerator.ANNOTATION_LABEL,
StandardGenerator.ITALIC_DISPLAY_PREFIX
+ str(atom.getProperty(BaseMol.CONF_INDEX)),
+ atom.getProperty(BaseMol.CONF_INDEX).toString(),
)
elif atom.getProperty(BaseMol.CIP_LABEL_KEY) is not None:
atom.setProperty(
StandardGenerator.ANNOTATION_LABEL,
StandardGenerator.ITALIC_DISPLAY_PREFIX
+ str(atom.getProperty(BaseMol.CIP_LABEL_KEY)),
+ atom.getProperty(BaseMol.CIP_LABEL_KEY).toString(),
)

# Iterate over bonds
for bond in mol.bonds():
if bond.getProperty(BaseMol.CIP_LABEL_KEY) is not None:
bond.setProperty(
StandardGenerator.ANNOTATION_LABEL,
StandardGenerator.ITALIC_DISPLAY_PREFIX
+ bond.getProperty(BaseMol.CIP_LABEL_KEY),
+ bond.getProperty(BaseMol.CIP_LABEL_KEY).toString(),
)

return mol
Expand Down
25 changes: 15 additions & 10 deletions app/modules/toolkits/rdkit_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,17 @@ def get3Dconformers(smiles, depict=True):
else:
mol = Chem.MolFromSmiles(smiles)
if mol:
AllChem.Compute2DCoords(mol)
mol = Chem.AddHs(mol)
AllChem.EmbedMolecule(mol, randomSeed=0xF00D)
AllChem.MMFFOptimizeMolecule(mol, maxIters=200)
try:
AllChem.MMFFOptimizeMolecule(mol)
except Exception as e:
print(e)
AllChem.EmbedMolecule(mol, randomSeed=0xF00D)
if depict:
return Chem.MolToMolBlock(mol)
else:
mol = Chem.RemoveHs(mol)
# mol = Chem.RemoveHs(mol)
return Chem.MolToMolBlock(mol)
else:
return "Error reading SMILES string, check again."
Expand All @@ -126,15 +129,17 @@ def getTanimotoSimilarityRDKit(smiles1, smiles2):
# create two example molecules
mol1 = checkSMILES(smiles1)
mol2 = checkSMILES(smiles2)
if mol1 and mol2:
# generate Morgan fingerprints for each molecule
fp1 = AllChem.GetMorganFingerprintAsBitVect(mol1, 2, nBits=1024)
fp2 = AllChem.GetMorganFingerprintAsBitVect(mol2, 2, nBits=1024)

# generate Morgan fingerprints for each molecule
fp1 = AllChem.GetMorganFingerprintAsBitVect(mol1, 2, nBits=1024)
fp2 = AllChem.GetMorganFingerprintAsBitVect(mol2, 2, nBits=1024)

# calculate the Tanimoto similarity between the fingerprints
similarity = DataStructs.TanimotoSimilarity(fp1, fp2)
# calculate the Tanimoto similarity between the fingerprints
similarity = DataStructs.TanimotoSimilarity(fp1, fp2)

return similarity
return similarity
else:
return "Check SMILES strings for Errors"


async def getRDKitHOSECodes(smiles: str, noOfSpheres: int):
Expand Down
20 changes: 14 additions & 6 deletions app/routers/chem.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,17 @@ async def NPlikeliness_Score(smiles: str):
async def Tanimoto_Similarity(smiles: str, toolkit: Optional[str] = "cdk"):
"""
Generate the Tanimoto similarity index for a given pair of SMILES strings.
Using cdk as default parameter the Tanimoto is calculated using Pubchemfingerprints.
https://cdk.github.io/cdk/2.8/docs/api/org/openscience/cdk/fingerprint/PubchemFingerprinter.html
Using rdkit the Tanimoto is calculated using Morganfingerprints with radius: 2 and nBits=1024.
Further modifications check the rdkit_wrapper module.
Usage: Please give a SMILES pair with "," separated. Example: api.naturalproducts.net/latest/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C
- **SMILES**: required (query)
- **toolkit**: optional (defaults: cdk)
- **toolkit**: optional (defaults: cdk, rdkit also used)
"""
if len(smiles.split(",")) == 2:
try:
Expand All @@ -225,16 +233,16 @@ async def Tanimoto_Similarity(smiles: str, toolkit: Optional[str] = "cdk"):
else:
Tanimoto = getTanimotoSimilarityCDK(smiles1, smiles2)
return Tanimoto
except ValueError:
return 'Please give a SMILES pair with "," separated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)'
except Exception:
return 'Please give a SMILES pair with "," separated. Example: api.naturalproducts.net/latest/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C'
elif len(smiles.split(",")) > 2:
try:
matrix = getTanimotoSimilarity(smiles, toolkit)
return Response(content=matrix, media_type="text/html")
except ValueError:
return 'Please give a SMILES pair with "," separated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)'
except Exception:
return 'Please give a SMILES pair with "," separated. Example: api.naturalproducts.net/latest/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C'
else:
return 'Please give a SMILES pair with "," separated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)'
return 'Please give a SMILES pair with "," separated. Example: api.naturalproducts.net/latest/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C'


@router.get("/coconut/pre-processing")
Expand Down

0 comments on commit 22eadf9

Please sign in to comment.