Skip to content

Commit

Permalink
feat: Add CIP annotations #136
Browse files Browse the repository at this point in the history
  • Loading branch information
Kohulan committed May 16, 2023
1 parent d63ee30 commit 15406fd
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 11 deletions.
107 changes: 106 additions & 1 deletion app/modules/cdkmodules.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,25 @@
if not isJVMStarted():
cdk_path = "https://github.com/cdk/cdk/releases/download/cdk-2.8/cdk-2.8.jar"
sru_path = "https://github.com/JonasSchaub/SugarRemoval/releases/download/v1.3.2/SugarRemovalUtility-jar-with-dependencies.jar"
centres_path = (
"https://github.com/SiMolecule/centres/releases/download/1.0/centres.jar"
)
cdkjar_path = str(pystow.join("STOUT-V2")) + "/cdk-2.8.jar"
srujar_path = (
str(pystow.join("STOUT-V2")) + "/SugarRemovalUtility-jar-with-dependencies.jar"
)
centresjar_path = str(pystow.join("STOUT-V2")) + "/centres.jar"

if not os.path.exists(cdkjar_path):
jar_path = pystow.ensure("STOUT-V2", url=cdk_path)

if not os.path.exists(srujar_path):
jar_path = pystow.ensure("STOUT-V2", url=sru_path)

startJVM("-ea", classpath=[cdkjar_path, srujar_path])
if not os.path.exists(centresjar_path):
jar_path = pystow.ensure("STOUT-V2", url=centres_path)

startJVM("-ea", classpath=[cdkjar_path, srujar_path, centresjar_path])
cdk_base = "org.openscience.cdk"


Expand Down Expand Up @@ -271,3 +278,101 @@ def getTanimotoSimilarityCDK(smiles1: str, smiles2: str):
Similarity = Tanimoto.calculate(fingerprint1, fingerprint2)

return "{:.5f}".format(float(str(Similarity)))


def getCIPAnnotation(smiles: str):
"""
The function return the CIP annotations using the CDK
CIP toolkit.
Args: mol block
Returns: CIP annotated mol block
"""
mol = getCDKSDG(smiles)
centres_base = "com.simolecule.centres"
Cycles = JClass(cdk_base + ".graph.Cycles")
IBond = JClass(cdk_base + ".interfaces.IBond")
IStereoElement = JClass(cdk_base + ".interfaces.IStereoElement")
Stereocenters = JClass(cdk_base + ".stereo.Stereocenters")
StandardGenerator = JClass(
cdk_base + ".renderer.generators.standard.StandardGenerator"
)

BaseMol = JClass(centres_base + ".BaseMol")
CdkLabeller = JClass(centres_base + ".CdkLabeller")
Descriptor = JClass(centres_base + ".Descriptor")

stereocenters = Stereocenters.of(mol)
for atom in mol.atoms():
if (
stereocenters.isStereocenter(atom.getIndex())
and stereocenters.elementType(atom.getIndex())
== Stereocenters.Type.Tetracoordinate
):
atom.setProperty(StandardGenerator.ANNOTATION_LABEL, "(?)")

for bond in mol.bonds():
if bond.getOrder() != IBond.Order.DOUBLE:
continue
begIdx = bond.getBegin().getIndex()
endIdx = bond.getEnd().getIndex()
if (
stereocenters.elementType(begIdx) == Stereocenters.Type.Tricoordinate
and stereocenters.elementType(endIdx) == Stereocenters.Type.Tricoordinate
and stereocenters.isStereocenter(begIdx)
and stereocenters.isStereocenter(endIdx)
):
# only if not in a small ring <7
if Cycles.smallRingSize(bond, 7) == 0:
bond.setProperty(StandardGenerator.ANNOTATION_LABEL, "(?)")
# no defined stereo?
if not mol.stereoElements().iterator().hasNext():
return mol

CdkLabeller.label(mol)

# update to label appropriately for racmic and relative stereochemistry
for se in mol.stereoElements():
if se.getConfigClass() == IStereoElement.TH and se.getGroupInfo() != 0:
focus = se.getFocus()
label = focus.getProperty(BaseMol.CIP_LABEL_KEY)
if (
isinstance(label, Descriptor)
and label != Descriptor.ns
and label != Descriptor.Unknown
):
if (se.getGroupInfo() & IStereoElement.GRP_RAC) != 0:
inv = None
if label == Descriptor.R:
inv = Descriptor.S
elif label == Descriptor.S:
inv = Descriptor.R
if inv is not None:
focus.setProperty(
BaseMol.CIP_LABEL_KEY, label.toString() + inv.name()
)
elif (se.getGroupInfo() & IStereoElement.GRP_REL) != 0:
if label in [Descriptor.R, Descriptor.S]:
focus.setProperty(BaseMol.CIP_LABEL_KEY, label.toString() + "*")

for atom in mol.atoms():
if atom.getProperty(BaseMol.CONF_INDEX) is not None:
atom.setProperty(
StandardGenerator.ANNOTATION_LABEL,
StandardGenerator.ITALIC_DISPLAY_PREFIX
+ str(atom.getProperty(BaseMol.CONF_INDEX)),
)
elif atom.getProperty(BaseMol.CIP_LABEL_KEY) is not None:
atom.setProperty(
StandardGenerator.ANNOTATION_LABEL,
StandardGenerator.ITALIC_DISPLAY_PREFIX
+ str(atom.getProperty(BaseMol.CIP_LABEL_KEY)),
)
for bond in mol.bonds():
if bond.getProperty(BaseMol.CIP_LABEL_KEY) is not None:
bond.setProperty(
StandardGenerator.ANNOTATION_LABEL,
StandardGenerator.ITALIC_DISPLAY_PREFIX
+ bond.getProperty(BaseMol.CIP_LABEL_KEY),
)

return mol
13 changes: 8 additions & 5 deletions app/modules/depict.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
from rdkit import Chem
from rdkit.Chem import rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from app.modules.cdkmodules import getCDKSDG
from app.modules.cdkmodules import getCDKSDG, getCIPAnnotation
from jpype import JClass


def getCDKDepiction(smiles: str, molSize=(512, 512), rotate=0, unicolor=False):
def getCDKDepiction(
smiles: str, molSize=(512, 512), rotate=0, CIP=True, unicolor=False
):
"""This function takes the user input SMILES and Depicts it
using the CDK Depiction Generator.
Args:
Expand All @@ -25,7 +27,6 @@ def getCDKDepiction(smiles: str, molSize=(512, 512), rotate=0, unicolor=False):
DepictionGenerator = (
JClass(cdk_base + ".depict.DepictionGenerator")()
.withSize(molSize[0], molSize[1])
.withAtomValues()
.withParam(StandardGenerator.StrokeRatio.class_, 1.0)
.withAnnotationColor(Color.BLACK)
.withParam(StandardGenerator.AtomColor.class_, UniColor(Color.BLACK))
Expand All @@ -37,15 +38,17 @@ def getCDKDepiction(smiles: str, molSize=(512, 512), rotate=0, unicolor=False):
JClass(cdk_base + ".depict.DepictionGenerator")()
.withAtomColors(CDK2DAtomColors)
.withSize(molSize[0], molSize[1])
.withAtomValues()
.withParam(StandardGenerator.StrokeRatio.class_, 1.0)
.withFillToFit()
.withBackgroundColor(Color.WHITE)
)
if any(char.isspace() for char in smiles):
smiles = smiles.replace(" ", "+")

moleculeSDG = getCDKSDG(smiles)
if CIP:
moleculeSDG = getCIPAnnotation(smiles)
else:
moleculeSDG = getCDKSDG(smiles)

if moleculeSDG:

Expand Down
9 changes: 4 additions & 5 deletions app/routers/chem.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,7 @@ async def CDK2D_Coordinates(smiles: str):
if smiles:
mol = Chem.MolFromSmiles(smiles)
if mol:
return Response(
content=getCDKSDGMol(smiles).replace("$$$$\n", ""),
media_type="text/plain",
)
return getCDKSDGMol(smiles)
else:
return "Error reading SMILES string, check again."

Expand Down Expand Up @@ -193,6 +190,8 @@ async def Depict2D_molecule(
width: Optional[int] = 512,
height: Optional[int] = 512,
rotate: Optional[int] = 0,
CIP: Optional[bool] = True,
unicolor: Optional[bool] = False,
):
"""
Generate 2D Depictions using CDK or RDKit using given parameters.
Expand All @@ -206,7 +205,7 @@ async def Depict2D_molecule(
if generator:
if generator == "cdksdg":
return Response(
content=getCDKDepiction(smiles, [width, height], rotate),
content=getCDKDepiction(smiles, [width, height], rotate, CIP, unicolor),
media_type="image/svg+xml",
)
else:
Expand Down

0 comments on commit 15406fd

Please sign in to comment.