diff --git a/Dockerfile b/Dockerfile index d18b2ac..b1059ad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,22 +1,20 @@ -FROM debian:buster-20230227-slim AS cheminf-python-ms +FROM continuumio/miniconda3 AS cheminf-python-ms + +ENV PYTHON_VERSION=3.10 +ENV RDKIT_VERSION=2023.03.1 ARG RELEASE_VERSION ENV RELEASE_VERSION=${RELEASE_VERSION} # Install runtime dependencies -RUN apt-get update \ - && apt-get upgrade -y \ - && apt-get install -y --no-install-recommends \ - python3 \ - python3-pip \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - RUN apt-get update && \ apt-get install -y software-properties-common && \ apt-get update -y && \ apt-get install -y openjdk-11-jre +RUN conda install -c conda-forge python>=$PYTHON_VERSION +RUN conda install -c conda-forge rdkit>=RDKIT_VERSION + RUN python3 -m pip install -U pip ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64/ diff --git a/README.md b/README.md index cebc5d7..4b34d72 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ [![framework](https://img.shields.io/badge/Framework-FastAPI-blue?style)](https://fastapi.tiangolo.com/) [![FastAPI Documentation](https://img.shields.io/badge/docs-fastapi-blue)](https://api.naturalproducts.net/docs#/) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7747862.svg)](https://doi.org/10.5281/zenodo.7747862) -## Overview of Cheminformatics Micro Services +## Overview of Cheminformatics Python Microservices This set of essential and valuable microservices is designed to be accessed via API calls to support cheminformatics. Generally, it is designed to work with SMILES-based inputs and could be used to translate between different machine-readable representations, get Natural Product (NP) likeliness scores, visualize chemical structures, and generate descriptors. In addition, the microservices also host an instance of [STOUT](https://github.com/Kohulan/Smiles-TO-iUpac-Translator) and another instance of [DECIMER](https://github.com/Kohulan/DECIMER-Image_Transformer) (two deep learning models for IUPAC name generation and optical chemical structure recognition, respectively). @@ -23,37 +23,37 @@ This set of essential and valuable microservices is designed to be accessed via - SMILES to IUPAC name ```fastapi - https://api.naturalproducts.net/convert/iupac?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C + https://api.naturalproducts.net/v1/convert/iupac?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C ``` - SMILES to SELFIES ```fastapi - https://api.naturalproducts.net/convert/selfies?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C + https://api.naturalproducts.net/v1/convert/selfies?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C ``` - SMILES to mol (default: CDK) ```fastapi - https://api.naturalproducts.net/convert/mol?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C + https://api.naturalproducts.net/v1/convert/mol?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C ``` ```fastapi - https://api.naturalproducts.net/convert/mol?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C&generator=rdkit + https://api.naturalproducts.net/v1/convert/mol?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C&generator=rdkit ``` - Chem - Calculate Descriptors ```fastapi - https://api.naturalproducts.net/chem/descriptors?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C + https://api.naturalproducts.net/v1/chem/descriptors?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C ``` - Depict molecule (default: CDK) ```fastapi - https://api.naturalproducts.net/chem/depict?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C + https://api.naturalproducts.net/v1/chem/depict?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C ``` - Depict molecule with settings ```fastapi - https://api.naturalproducts.net/chem/depict?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C&generator=rdkit&width=256&height=256&rotate=75 + https://api.naturalproducts.net/v1/chem/depict?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C&generator=rdkit&width=256&height=256&rotate=75 ``` - Visualize molecule in 3D ```fastapi - https://api.naturalproducts.net/chem/depict3D?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C + https://api.naturalproducts.net/v1/chem/depict3D?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C ``` > **Note** @@ -65,10 +65,28 @@ This project is licensed under the MIT License - see the [LICENSE](https://githu ## Citation -Venkata, C., Sharma, N., & Rajan, K. (2023). cheminformatics-python-microservice (Version v0.3.0 - prerelease) [Computer software]. https://doi.org/10.5281/zenodo.7747862 +Venkata, C., Sharma, N., & Rajan, K. (2023). Cheminformatics Python Microservice (Version v0.8.0 - prerelease) [Computer software]. https://doi.org/10.5281/zenodo.7747862 + +## Version information +

+ Cheminformatics Python Microservice: V0.8.0 +

+ +

+ +| Tools | Version | +|------------|-----------| +| RDKit | 2023.03.1 | +| CDK | 2.8.0 | +| STOUT | 2.0.0 | +| DECIMER | 2.2.0 | + +

+ + ## Maintained by -Cheminformatics Micro Services and [Natural Products Online](https://naturalproducts.net) are developed and maintained by the [Steinbeck group](https://cheminf.uni-jena.de) at the [Friedrich Schiller University](https://www.uni-jena.de/en/) Jena, Germany. +Cheminformatics Python Microservice and [Natural Products Online](https://naturalproducts.net) are developed and maintained by the [Steinbeck group](https://cheminf.uni-jena.de) at the [Friedrich Schiller University](https://www.uni-jena.de/en/) Jena, Germany. The code for this web application is released under the [MIT license](https://opensource.org/licenses/MIT). Copyright © CC-BY-SA 2023

cheminf Logo

diff --git a/app/__init__.py b/app/__init__.py index e69de29..5ac3f3b 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- + +""" +Cheminformatics Micro Services Python Modules. + +This set of essential and valuable microservices is designed to be accessed via API calls to support cheminformatics. Generally, it is designed to work with SMILES-based inputs and could be used to translate between different machine-readable representations, get Natural Product (NP) likeliness scores, visualize chemical structures, and generate descriptors. In addition, the microservices also host an instance of STOUT and another instance of DECIMER (two deep learning models for IUPAC name generation and optical chemical structure recognition, respectively). + +For comments, bug reports or feature requests, +please raise a issue on our Github repository. +""" + +__version__ = "0.8.0" diff --git a/app/config.py b/app/config.py deleted file mode 100644 index 64c8c6c..0000000 --- a/app/config.py +++ /dev/null @@ -1,12 +0,0 @@ -from pydantic import BaseSettings, Field - - -class Settings(BaseSettings): - PGPASSWORD: str = Field(..., env="PGPASSWORD") - POSTGRES_DB: str = Field(..., env="POSTGRES_DB") - POSTGRES_USER: str = Field(..., env="POSTGRES_USER") - POSTGRES_PASSWORD: str = Field(..., env="POSTGRES_PASSWORD") - POSTGRES_PORT: str = Field(..., env="POSTGRES_PORT") - - -settings = Settings() diff --git a/app/main.py b/app/main.py index 4ca99d9..2fa1013 100644 --- a/app/main.py +++ b/app/main.py @@ -1,13 +1,25 @@ from fastapi import FastAPI from fastapi.responses import RedirectResponse +from fastapi_versioning import VersionedFastAPI # from .config import settings -from .routers import chem, converters, compose, decimer +from .routers import chem, converters, decimer from fastapi.middleware.cors import CORSMiddleware -from fastapi.openapi.utils import get_openapi -import os -app = FastAPI() +app = FastAPI( + title="Cheminf Micro Services", + description="This set of essential and valuable microservices is designed to be accessed via API calls to support cheminformatics. Generally, it is designed to work with SMILES-based inputs and could be used to translate between different machine-readable representations, get Natural Product (NP) likeliness scores, visualize chemical structures, and generate descriptors. In addition, the microservices also host an instance of STOUT and another instance of DECIMER (two deep learning models for IUPAC name generation and optical chemical structure recognition, respectively).", + terms_of_service="https://github.com/Steinbeck-Lab", + contact={ + "name": "Steinbeck Lab", + "url": "https://cheminf.uni-jena.de/", + "email": "caffeine@listserv.uni-jena.de", + }, + license_info={ + "name": "CC BY 4.0", + "url": "https://creativecommons.org/licenses/by/4.0/", + }, +) origins = ["*"] @@ -21,7 +33,6 @@ app.include_router(chem.router) app.include_router(converters.router) -app.include_router(compose.router) app.include_router(decimer.router) @@ -30,20 +41,18 @@ async def docs_redirect(): return RedirectResponse(url="/docs") -def custom_openapi(): - if app.openapi_schema: - return app.openapi_schema - openapi_schema = get_openapi( - title="Cheminf Micro Services", - version=os.getenv("RELEASE_VERSION", "latest"), - description="This set of essential and valuable microservices is designed to be accessed via API calls to support cheminformatics. Generally, it is designed to work with SMILES-based inputs and could be used to translate between different machine-readable representations, get Natural Product (NP) likeliness scores, visualize chemical structures, and generate descriptors. In addition, the microservices also host an instance of STOUT and another instance of DECIMER (two deep learning models for IUPAC name generation and optical chemical structure recognition, respectively).", - routes=app.routes, - ) - openapi_schema["info"]["x-logo"] = { - "url": "https://github.com/Steinbeck-Lab/cheminformatics-python-microservice/raw/main/public/img/logo.png" - } - app.openapi_schema = openapi_schema - return app.openapi_schema - - -app.openapi = custom_openapi +app = VersionedFastAPI( + app, + version_format="{major}", + prefix_format="/v{major}", + terms_of_service="https://github.com/Steinbeck-Lab", + contact={ + "name": "Steinbeck Lab", + "url": "https://cheminf.uni-jena.de/", + "email": "caffeine@listserv.uni-jena.de", + }, + license_info={ + "name": "CC BY 4.0", + "url": "https://creativecommons.org/licenses/by/4.0/", + }, +) diff --git a/app/modules/alldescriptors.py b/app/modules/alldescriptors.py index 2bb7e72..6ff6e7b 100644 --- a/app/modules/alldescriptors.py +++ b/app/modules/alldescriptors.py @@ -175,7 +175,7 @@ def getCDKRDKitcombinedDescriptors(smiles: str): "Heavy atom count", "Molecular weight", "Exact molecular weight", - "ALogP", + "Calculated LogP", "Rotatable bond count", "Topological polar surface area", "Hydrogen bond acceptors", diff --git a/app/modules/cdkmodules.py b/app/modules/cdkmodules.py index 1067198..17f444e 100644 --- a/app/modules/cdkmodules.py +++ b/app/modules/cdkmodules.py @@ -18,10 +18,14 @@ if not isJVMStarted(): cdk_path = "https://github.com/cdk/cdk/releases/download/cdk-2.8/cdk-2.8.jar" sru_path = "https://github.com/JonasSchaub/SugarRemoval/releases/download/v1.3.2/SugarRemovalUtility-jar-with-dependencies.jar" + centres_path = ( + "https://github.com/SiMolecule/centres/releases/download/1.0/centres.jar" + ) cdkjar_path = str(pystow.join("STOUT-V2")) + "/cdk-2.8.jar" srujar_path = ( str(pystow.join("STOUT-V2")) + "/SugarRemovalUtility-jar-with-dependencies.jar" ) + centresjar_path = str(pystow.join("STOUT-V2")) + "/centres.jar" if not os.path.exists(cdkjar_path): jar_path = pystow.ensure("STOUT-V2", url=cdk_path) @@ -29,7 +33,10 @@ if not os.path.exists(srujar_path): jar_path = pystow.ensure("STOUT-V2", url=sru_path) - startJVM("-ea", classpath=[cdkjar_path, srujar_path]) + if not os.path.exists(centresjar_path): + jar_path = pystow.ensure("STOUT-V2", url=centres_path) + + startJVM("-ea", classpath=[cdkjar_path, srujar_path, centresjar_path]) cdk_base = "org.openscience.cdk" @@ -95,14 +102,18 @@ def getMurkoFramework(smiles: str): SmilesParser = JClass(cdk_base + ".smiles.SmilesParser")(SCOB.getInstance()) molecule = SmilesParser.parseSmiles(smiles) MurkoFragmenter.generateFragments(molecule) + if len(MurkoFragmenter.getFrameworks()) == 0: + return "None" + return str(MurkoFragmenter.getFrameworks()[0]) -def getCDKSDGMol(smiles: str): +def getCDKSDGMol(smiles: str, V3000=False): """This function takes the user input SMILES and returns a mol block as a string with Structure Diagram Layout. Args: smiles (string): SMILES string given by the user. + V3000 (boolean): Gives an option to return V3000 mol. Returns: mol object (string): CDK Structure Diagram Layout mol block. """ @@ -112,6 +123,7 @@ def getCDKSDGMol(smiles: str): moleculeSDG = getCDKSDG(smiles) SDFW = JClass(cdk_base + ".io.SDFWriter")(StringW) + SDFW.setAlwaysV3000(V3000) SDFW.write(moleculeSDG) SDFW.flush() mol_str = str(StringW.toString()) @@ -271,3 +283,143 @@ def getTanimotoSimilarityCDK(smiles1: str, smiles2: str): Similarity = Tanimoto.calculate(fingerprint1, fingerprint2) return "{:.5f}".format(float(str(Similarity))) + + +def getCIPAnnotation(smiles: str): + """ + The function return the CIP annotations using the CDK + CIP toolkit. + Args: mol block + Returns: CIP annotated mol block + """ + mol = getCDKSDG(smiles) + centres_base = "com.simolecule.centres" + Cycles = JClass(cdk_base + ".graph.Cycles") + IBond = JClass(cdk_base + ".interfaces.IBond") + IStereoElement = JClass(cdk_base + ".interfaces.IStereoElement") + Stereocenters = JClass(cdk_base + ".stereo.Stereocenters") + StandardGenerator = JClass( + cdk_base + ".renderer.generators.standard.StandardGenerator" + ) + + BaseMol = JClass(centres_base + ".BaseMol") + CdkLabeller = JClass(centres_base + ".CdkLabeller") + Descriptor = JClass(centres_base + ".Descriptor") + + stereocenters = Stereocenters.of(mol) + for atom in mol.atoms(): + if ( + stereocenters.isStereocenter(atom.getIndex()) + and stereocenters.elementType(atom.getIndex()) + == Stereocenters.Type.Tetracoordinate + ): + atom.setProperty(StandardGenerator.ANNOTATION_LABEL, "(?)") + + for bond in mol.bonds(): + if bond.getOrder() != IBond.Order.DOUBLE: + continue + begIdx = bond.getBegin().getIndex() + endIdx = bond.getEnd().getIndex() + if ( + stereocenters.elementType(begIdx) == Stereocenters.Type.Tricoordinate + and stereocenters.elementType(endIdx) == Stereocenters.Type.Tricoordinate + and stereocenters.isStereocenter(begIdx) + and stereocenters.isStereocenter(endIdx) + ): + # only if not in a small ring <7 + if Cycles.smallRingSize(bond, 7) == 0: + bond.setProperty(StandardGenerator.ANNOTATION_LABEL, "(?)") + # no defined stereo? + if not mol.stereoElements().iterator().hasNext(): + return mol + + CdkLabeller.label(mol) + + # update to label appropriately for racmic and relative stereochemistry + for se in mol.stereoElements(): + if se.getConfigClass() == IStereoElement.TH and se.getGroupInfo() != 0: + focus = se.getFocus() + label = focus.getProperty(BaseMol.CIP_LABEL_KEY) + if ( + isinstance(label, Descriptor) + and label != Descriptor.ns + and label != Descriptor.Unknown + ): + if (se.getGroupInfo() & IStereoElement.GRP_RAC) != 0: + inv = None + if label == Descriptor.R: + inv = Descriptor.S + elif label == Descriptor.S: + inv = Descriptor.R + if inv is not None: + focus.setProperty( + BaseMol.CIP_LABEL_KEY, label.toString() + inv.name() + ) + elif (se.getGroupInfo() & IStereoElement.GRP_REL) != 0: + if label in [Descriptor.R, Descriptor.S]: + focus.setProperty(BaseMol.CIP_LABEL_KEY, label.toString() + "*") + + for atom in mol.atoms(): + if atom.getProperty(BaseMol.CONF_INDEX) is not None: + atom.setProperty( + StandardGenerator.ANNOTATION_LABEL, + StandardGenerator.ITALIC_DISPLAY_PREFIX + + str(atom.getProperty(BaseMol.CONF_INDEX)), + ) + elif atom.getProperty(BaseMol.CIP_LABEL_KEY) is not None: + atom.setProperty( + StandardGenerator.ANNOTATION_LABEL, + StandardGenerator.ITALIC_DISPLAY_PREFIX + + str(atom.getProperty(BaseMol.CIP_LABEL_KEY)), + ) + for bond in mol.bonds(): + if bond.getProperty(BaseMol.CIP_LABEL_KEY) is not None: + bond.setProperty( + StandardGenerator.ANNOTATION_LABEL, + StandardGenerator.ITALIC_DISPLAY_PREFIX + + bond.getProperty(BaseMol.CIP_LABEL_KEY), + ) + + return mol + + +def getCXSMILES(smiles: str): + """This function takes the user input SMILES and creates a + CXSMILES string with 2D atom coordinates + Args: + smiles (string): SMILES string given by the user. + Returns: + smiles (string): CXSMILES string. + + """ + moleculeSDG = getCDKSDG(smiles) + SmiFlavor = JClass(cdk_base + ".smiles.SmiFlavor") + SmilesGenerator = JClass(cdk_base + ".smiles.SmilesGenerator")( + SmiFlavor.Absolute | SmiFlavor.CxSmilesWithCoords + ) + CXSMILES = SmilesGenerator.create(moleculeSDG) + return str(CXSMILES) + + +def getCDKHOSECodes(smiles: str, noOfSpheres: int, ringsize: bool): + """This function takes the user input SMILES and returns a mol + block as a string with Structure Diagram Layout. + Args: + smiles(str), noOfSpheres(int), ringsize(bool): SMILES string, No of Spheres and the ringsize given by the user. + Returns: + HOSECodes (string): CDK generted HOSECodes. + """ + if any(char.isspace() for char in smiles): + smiles = smiles.replace(" ", "+") + SCOB = JClass(cdk_base + ".silent.SilentChemObjectBuilder") + SmilesParser = JClass(cdk_base + ".smiles.SmilesParser")(SCOB.getInstance()) + molecule = SmilesParser.parseSmiles(smiles) + HOSECodeGenerator = JClass(cdk_base + ".tools.HOSECodeGenerator")() + HOSECodes = [] + atoms = molecule.atoms() + for atom in atoms: + moleculeHOSECode = HOSECodeGenerator.getHOSECode( + molecule, atom, noOfSpheres, ringsize + ) + HOSECodes.append(str(moleculeHOSECode)) + return HOSECodes diff --git a/app/modules/coconutpreprocess.py b/app/modules/coconutpreprocess.py new file mode 100644 index 0000000..c872b54 --- /dev/null +++ b/app/modules/coconutpreprocess.py @@ -0,0 +1,132 @@ +from rdkit import Chem +import app.modules.rdkitmodules as rdkitmodules +import app.modules.cdkmodules as cdkmodules +from app.modules.coconutdescriptors import getCOCONUTDescriptors + + +def getMolBlock(input_text: str): + """ + This function generates a molblock from the + input text. + Args (str): Input text (mol / SMILES) + returns (str): molblock + """ + check = rdkitmodules.is_valid_molecule(input_text) + + if check == "smiles": + mol_block = cdkmodules.getCDKSDGMol(input_text, V3000=False).replace( + "$$$$\n", "" + ) + return mol_block + elif check == "mol": + return input_text + else: + return "Error!, Check the input text." + + +def getMolculeHash(smiles: str): + """ + This function returns a set of molecule hashes defined. + Args (str): SMILES string (strandardised is preferred). + Returns (dict): molecule_hash + """ + mol = Chem.MolFromSmiles(smiles) + if mol: + Formula = Chem.rdMolDescriptors.CalcMolFormula(mol) + Isomeric_SMILES = Chem.MolToSmiles(mol, kekuleSmiles=True) + Canonical_SMILES = Chem.MolToSmiles( + mol, kekuleSmiles=True, isomericSmiles=False + ) + return { + "Formula": Formula, + "Isomeric_SMILES": Isomeric_SMILES, + "Canonical_SMILES": Canonical_SMILES, + } + + +def getRepresentations(smiles: str): + """ + This functions returns COCONUT representations. + InChI, InChi key and Murko framework. + Args (str): SMILES string. + Returns (dict): dictionary of InChI, InChi key and Murko framework. + """ + mol = Chem.MolFromSmiles(smiles) + if mol: + InChI = Chem.inchi.MolToInchi(mol) + InChI_Key = Chem.inchi.MolToInchiKey(mol) + Murko = cdkmodules.getMurkoFramework(smiles) + return {"InChI": InChI, "InChI_Key": InChI_Key, "Murko": Murko} + + +def COCONUTpreprocessing(input_text: str): + """ + This function takes a user input text and returns a dictionary for COCONUT input. + Args (str): input_text (mol/str). + Returns (dict): COCONUT preprocessed data. + """ + original_mol = getMolBlock(input_text) + standarised_mol_block = rdkitmodules.standardizer.standardize_molblock(original_mol) + standardised_SMILES = Chem.MolToSmiles( + Chem.MolFromMolBlock(standarised_mol_block), kekuleSmiles=True + ) + molecule_hash = getMolculeHash(standardised_SMILES) + parent_canonical_smiles = molecule_hash["Canonical_SMILES"] + parent_2D_molblock = cdkmodules.getCDKSDGMol( + parent_canonical_smiles, V3000=False + ).replace("$$$$\n", "") + parent_3D_molblock = rdkitmodules.get3Dconformers(parent_canonical_smiles) + parent_2D_molblock_v3 = cdkmodules.getCDKSDGMol( + parent_canonical_smiles, V3000=True + ).replace("$$$$\n", "") + parent_representations = getRepresentations(parent_canonical_smiles) + parent_descriptors = getCOCONUTDescriptors(parent_canonical_smiles, "rdkit") + + if rdkitmodules.has_stereochemistry(standardised_SMILES): + variant_isomeric_smiles = molecule_hash["Isomeric_SMILES"] + variant_2D_molblock = cdkmodules.getCDKSDGMol( + variant_isomeric_smiles, V3000=False + ).replace("$$$$\n", "") + variant_2D_molblock_v3 = cdkmodules.getCDKSDGMol( + variant_isomeric_smiles, V3000=True + ).replace("$$$$\n", "") + variant_3D_molblock = rdkitmodules.get3Dconformers(variant_isomeric_smiles) + variant_representations = getRepresentations(variant_isomeric_smiles) + variant_descriptors = getCOCONUTDescriptors(variant_isomeric_smiles, "rdkit") + + return { + "original_mol": original_mol, + "standardised_mol": standarised_mol_block, + "standardised_SMILES": standardised_SMILES, + "molecule_hash": molecule_hash, + "parent": { + "2D_mol": parent_2D_molblock, + "3D_mol": parent_3D_molblock, + "v3000": parent_2D_molblock_v3, + "representations": parent_representations, + "descriptors": parent_descriptors, + }, + "stereochemical_variants": True, + "variants": { + "2D_mol": variant_2D_molblock, + "3D_mol": variant_3D_molblock, + "v3000": variant_2D_molblock_v3, + "representations": variant_representations, + "descriptors": variant_descriptors, + }, + } + + return { + "original_mol": original_mol, + "standardised_mol": standarised_mol_block, + "standardised_SMILES": standardised_SMILES, + "molecule_hash": molecule_hash, + "parent": { + "2D_mol": parent_2D_molblock, + "3D_mol": parent_3D_molblock, + "v3000": parent_2D_molblock_v3, + "representations": parent_representations, + "descriptors": parent_descriptors, + }, + "stereochemical_variants": False, + } diff --git a/app/modules/depict.py b/app/modules/depict.py index 0fef2d7..aabc566 100644 --- a/app/modules/depict.py +++ b/app/modules/depict.py @@ -2,11 +2,13 @@ from rdkit import Chem from rdkit.Chem import rdDepictor from rdkit.Chem.Draw import rdMolDraw2D -from app.modules.cdkmodules import getCDKSDG +from app.modules.cdkmodules import getCDKSDG, getCIPAnnotation from jpype import JClass -def getCDKDepiction(smiles: str, molSize=(512, 512), rotate=0, unicolor=False): +def getCDKDepiction( + smiles: str, molSize=(512, 512), rotate=0, CIP=True, unicolor=False +): """This function takes the user input SMILES and Depicts it using the CDK Depiction Generator. Args: @@ -25,7 +27,6 @@ def getCDKDepiction(smiles: str, molSize=(512, 512), rotate=0, unicolor=False): DepictionGenerator = ( JClass(cdk_base + ".depict.DepictionGenerator")() .withSize(molSize[0], molSize[1]) - .withAtomValues() .withParam(StandardGenerator.StrokeRatio.class_, 1.0) .withAnnotationColor(Color.BLACK) .withParam(StandardGenerator.AtomColor.class_, UniColor(Color.BLACK)) @@ -37,7 +38,6 @@ def getCDKDepiction(smiles: str, molSize=(512, 512), rotate=0, unicolor=False): JClass(cdk_base + ".depict.DepictionGenerator")() .withAtomColors(CDK2DAtomColors) .withSize(molSize[0], molSize[1]) - .withAtomValues() .withParam(StandardGenerator.StrokeRatio.class_, 1.0) .withFillToFit() .withBackgroundColor(Color.WHITE) @@ -45,10 +45,12 @@ def getCDKDepiction(smiles: str, molSize=(512, 512), rotate=0, unicolor=False): if any(char.isspace() for char in smiles): smiles = smiles.replace(" ", "+") - moleculeSDG = getCDKSDG(smiles) + if CIP: + moleculeSDG = getCIPAnnotation(smiles) + else: + moleculeSDG = getCDKSDG(smiles) if moleculeSDG: - # Rotate molecule point = JClass(cdk_base + ".geometry.GeometryTools").get2DCenter(moleculeSDG) JClass(cdk_base + ".geometry.GeometryTools").rotate( diff --git a/app/modules/rdkitmodules.py b/app/modules/rdkitmodules.py index 4d33df9..9ad9dbb 100644 --- a/app/modules/rdkitmodules.py +++ b/app/modules/rdkitmodules.py @@ -2,6 +2,7 @@ from rdkit import Chem, DataStructs from rdkit.Chem import AllChem, Descriptors, QED, Lipinski, rdMolDescriptors, rdmolops from app.modules.cdkmodules import getCDKSDGMol +from hosegen import HoseGenerator def checkSMILES(smiles: str): @@ -134,3 +135,62 @@ def getTanimotoSimilarityRDKit(smiles1, smiles2): similarity = DataStructs.TanimotoSimilarity(fp1, fp2) return similarity + + +def getRDKitHOSECodes(smiles: str, noOfSpheres: int): + """ + This function takes a SMILES string as input and + returns the calculated HOSEcodes + Args (smiles: str, noOfSpheres: int): SMILES string and No of Spheres as int. + Returns: hosecodes + + """ + if any(char.isspace() for char in smiles): + smiles = smiles.replace(" ", "+") + mol = Chem.MolFromSmiles(smiles) + gen = HoseGenerator() + hosecodes = [] + for i in range(0, len(mol.GetAtoms()) - 1): + hosecode = gen.get_Hose_codes(mol, i, noOfSpheres) + hosecodes.append(hosecode) + return hosecodes + + +def is_valid_molecule(input_text): + """ + This functions checks whether the input text + is a molblock or SMILES. + Args (str): SMILES string or molblock. + Returns (str): SMILES/Mol flag. + """ + try: + molecule = Chem.MolFromSmiles(input_text) + if molecule: + return "smiles" + else: + molecule = Chem.MolFromMolBlock(input_text) + if molecule: + return "mol" + else: + return False + except Exception: + return False + + +def has_stereochemistry(smiles: str): + """ + This function checks whether the input has stereochemistry or not. + Args (str) : SMILES string. + Returns (bool): True or false. + """ + mol = Chem.MolFromSmiles(smiles) + + if mol is None: + return False + + for atom in mol.GetAtoms(): + chiral_tag = atom.GetChiralTag() + if chiral_tag != Chem.ChiralType.CHI_UNSPECIFIED: + return True + + return False diff --git a/app/routers/chem.py b/app/routers/chem.py index 1bc0540..2ff0d92 100644 --- a/app/routers/chem.py +++ b/app/routers/chem.py @@ -6,14 +6,23 @@ EnumerateStereoisomers, ) from chembl_structure_pipeline import standardizer, checker -from fastapi.responses import Response, HTMLResponse +from fastapi.responses import Response, HTMLResponse, JSONResponse from app.modules.npscorer import getNPScore from app.modules.classyfire import classify, result -from app.modules.cdkmodules import getCDKSDGMol, getTanimotoSimilarityCDK +from app.modules.cdkmodules import ( + getCDKSDGMol, + getTanimotoSimilarityCDK, + getCDKHOSECodes, +) from app.modules.depict import getRDKitDepiction, getCDKDepiction -from app.modules.rdkitmodules import get3Dconformers, getTanimotoSimilarityRDKit +from app.modules.rdkitmodules import ( + get3Dconformers, + getTanimotoSimilarityRDKit, + getRDKitHOSECodes, +) from app.modules.coconutdescriptors import getCOCONUTDescriptors from app.modules.alldescriptors import getTanimotoSimilarity +from app.modules.coconutpreprocess import COCONUTpreprocessing import pandas as pd from fastapi.templating import Jinja2Templates @@ -35,7 +44,7 @@ async def chem_index(): @router.get("/stereoisomers") async def SMILES_to_Stereo_Isomers(smiles: str): """ - Enumerate all possible stereoisomers based on the chiral centers in the given SMILES: + Enumerate all possible stereoisomers based on the chiral centres in the given SMILES: - **SMILES**: required (query parameter) """ @@ -119,7 +128,7 @@ async def NPlikeliness_Score(smiles: str): @router.get("/classyfire/classify") async def ClassyFire_Classify(smiles: str): """ - Generate ClassyFire based classifications using SMILES as input. + Generate ClassyFire-based classifications using SMILES as input. - **SMILES**: required (query) """ @@ -143,14 +152,35 @@ async def ClassyFire_result(id: str): @router.get("/cdk2d") async def CDK2D_Coordinates(smiles: str): """ - Generate 2D Coordinates using CDK Strcture diagram generator and return the mol block. + Generate 2D Coordinates using the CDK Structure diagram generator and return the mol block. - **SMILES**: required (query) """ if smiles: mol = Chem.MolFromSmiles(smiles) if mol: - return getCDKSDGMol(smiles) + return Response( + content=getCDKSDGMol(smiles).replace("$$$$\n", ""), + media_type="text/plain", + ) + else: + return "Error reading SMILES string, check again." + + +@router.get("/rdkit3d") +async def RDKit3D_Mol(smiles: str): + """ + Generate 3D Coordinates using RDKit and return the mol block. + + - **SMILES**: required (query) + """ + if smiles: + mol = Chem.MolFromSmiles(smiles) + if mol: + return Response( + content=get3Dconformers(smiles).replace("$$$$\n", ""), + media_type="text/plain", + ) else: return "Error reading SMILES string, check again." @@ -158,7 +188,7 @@ async def CDK2D_Coordinates(smiles: str): @router.get("/tanimoto") async def Tanimoto_Similarity(smiles: str, toolkit: Optional[str] = "cdk"): """ - Generate Tanimoto similarity index for a given pair of SMILES strings. + Generate the Tanimoto similarity index for a given pair of SMILES strings. - **SMILES**: required (query) - **toolkit**: optional (defaults: cdk) @@ -172,15 +202,15 @@ async def Tanimoto_Similarity(smiles: str, toolkit: Optional[str] = "cdk"): Tanimoto = getTanimotoSimilarityCDK(smiles1, smiles2) return Tanimoto except ValueError: - return 'Please give a SMILES pair with "," seperated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)' + return 'Please give a SMILES pair with "," separated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)' elif len(smiles.split(",")) > 2: try: matrix = getTanimotoSimilarity(smiles, toolkit) return Response(content=matrix, media_type="text/html") except ValueError: - return 'Please give a SMILES pair with "," seperated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)' + return 'Please give a SMILES pair with "," separated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)' else: - return 'Please give a SMILES pair with "," seperated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)' + return 'Please give a SMILES pair with "," separated. (Example: api.naturalproducts.net/chem/tanimoto?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C,CN1C=NC2=C1C(=O)NC(=O)N2C)' @router.get("/depict") @@ -190,6 +220,8 @@ async def Depict2D_molecule( width: Optional[int] = 512, height: Optional[int] = 512, rotate: Optional[int] = 0, + CIP: Optional[bool] = False, + unicolor: Optional[bool] = False, ): """ Generate 2D Depictions using CDK or RDKit using given parameters. @@ -203,7 +235,7 @@ async def Depict2D_molecule( if generator: if generator == "cdksdg": return Response( - content=getCDKDepiction(smiles, [width, height], rotate), + content=getCDKDepiction(smiles, [width, height], rotate, CIP, unicolor), media_type="image/svg+xml", ) else: @@ -216,7 +248,7 @@ async def Depict2D_molecule( @router.get("/checkerrors") async def Check_Errors(smiles: str, fix: Optional[bool] = False): """ - Check issues for a given SMILES string and standardize it using ChEMBL curation pipeline. + Check issues for a given SMILES string and standardize it using the ChEMBL curation pipeline. - **SMILES**: required (query) - **fix**: optional (defaults: False) @@ -274,6 +306,31 @@ async def Depict3D_Molecule( return templates.TemplateResponse("mol.html", content) +@router.get("/hosecode") +async def HOSE_Codes(framework: str, smiles: str, spheres: int, ringsize: bool = False): + if smiles: + if framework == "cdk": + return await getCDKHOSECodes(smiles, spheres, ringsize) + elif framework == "rdkit": + return await getRDKitHOSECodes(smiles, spheres) + else: + return "Error reading SMILES string, check again." + + +@router.get("/coconutpreprocessing") +async def COCONUT_Preprocessing(smiles: str): + """ + Generate Input JSON file for COCONUT. + + - **SMILES**: required (query) + """ + if smiles: + data = COCONUTpreprocessing(smiles) + return JSONResponse(content=data) + else: + return "Error reading SMILES string, check again." + + # @app.get("/molecules/", response_model=List[schemas.Molecule]) # def read_molecules(skip: int = 0, limit: int = 100, db: Session = Depends(get_db)): # molecules = crud.get_molecules(db, skip=skip, limit=limit) diff --git a/app/routers/converters.py b/app/routers/converters.py index 81cb165..9e92420 100644 --- a/app/routers/converters.py +++ b/app/routers/converters.py @@ -6,7 +6,7 @@ from rdkit.Chem import AllChem from typing import Optional from STOUT import translate_forward, translate_reverse -from app.modules.cdkmodules import getCDKSDGMol +from app.modules.cdkmodules import getCDKSDGMol, getCXSMILES from app.modules.rdkitmodules import get3Dconformers router = APIRouter( @@ -33,7 +33,10 @@ async def SMILES_Mol(smiles: str, generator: Optional[str] = "cdk"): if smiles: if generator: if generator == "cdk": - return Response(content=getCDKSDGMol(smiles), media_type="text/plain") + return Response( + content=getCDKSDGMol(smiles).replace("$$$$\n", ""), + media_type="text/plain", + ) else: mol = Chem.MolFromSmiles(smiles) if mol: @@ -119,6 +122,22 @@ async def SMILES_to_InChIKey(smiles: str): return "Error reading SMILES string check again." +@router.get("/cxsmiles") +async def SMILES_to_CXSMILES(smiles: str): + """ + Convert SMILES to CXSMILES: + + - **SMILES**: required (query parameter) + """ + if any(char.isspace() for char in smiles): + smiles = smiles.replace(" ", "+") + if smiles: + cxsmiles = getCXSMILES(smiles) + return cxsmiles + else: + return "Error reading SMILES string check again." + + @router.get("/formats") async def SMILES_convert_to_Formats(smiles: str): """ diff --git a/app/routers/decimer.py b/app/routers/decimer.py index 4713916..1fa9a6a 100644 --- a/app/routers/decimer.py +++ b/app/routers/decimer.py @@ -23,9 +23,9 @@ async def DECIMER_Index(): @router.post("/process") async def Extract_ChemicalInfo( - path: Annotated[str, Body(embed=True)], - reference: Annotated[str, Body(embed=True)], - img: Annotated[str, Body(embed=True)], + path: Annotated[str, Body(embed=True)] = None, + reference: Annotated[str, Body(embed=True)] = None, + img: Annotated[str, Body(embed=True)] = None, ): """ Extract chemical structure depictions and convert them into SMILES using DECIMER: diff --git a/public/img/logo.png b/public/img/logo.png index a3d458b..f3206e0 100644 Binary files a/public/img/logo.png and b/public/img/logo.png differ diff --git a/public/img/logo.svg b/public/img/logo.svg index a87f3b2..66e09b5 100644 --- a/public/img/logo.svg +++ b/public/img/logo.svg @@ -1,9369 +1,4329 @@ - + - - - - - - - - - - + viewBox="0 0 1920 1080" style="enable-background:new 0 0 1920 1080;" xml:space="preserve"> + + diff --git a/requirements.txt b/requirements.txt index 70a7589..ee6232c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ fastapi>=0.80.0 uvicorn>=0.15.0,<0.16.0 fastapi-pagination==0.10.0 +fastapi-versioning>=0.10.0 rdkit-pypi>=2022.09.4 websockets==10.4 pillow==9.4.0 @@ -18,3 +19,4 @@ pillow-heif==0.10.0 selfies>=2.1.1 jinja2 pandas +HOSE_code_generator @ git+https://github.com/Ratsemaat/HOSE_code_generator \ No newline at end of file