Skip to content

Commit

Permalink
Merge pull request #269 from Steinbeck-Lab/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
CS76 authored Jul 4, 2023
2 parents aad1118 + ba04f70 commit de2515c
Show file tree
Hide file tree
Showing 23 changed files with 535 additions and 69 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,5 +131,8 @@ docs/.vitepress/cache
docs/.vitepress/cache/*
node_modules

# Sphinx documentation
docs/_build/

#grafana
grafana_data/
29 changes: 29 additions & 0 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Read the Docs configuration file for Sphinx projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

# Set the OS, Python version and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.11"


# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: docs/conf.py

# Optionally build your docs in additional formats such as PDF and ePub
formats:
- epub

# Optional but recommended, declare the Python requirements required
# to build your documentation
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
python:
install:
- requirements: docs/requirements.txt
- requirements: docs/extra-requirements.txt
ignore_dependencies: true
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ authors:
given-names: "Kohulan"
orcid: "https://orcid.org/0000-0003-1066-7792"
title: "cheminformatics-python-microservice"
version: v0.3.0 - prerelease
doi: 10.5281/zenodo.7747862
version: V1.0.0
doi: 10.5281/zenodo.8112749
date-released: 2023-03-16
url: "https://github.com/Steinbeck-Lab/cheminformatics-python-microservice"
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
![Workflow](https://github.com/Steinbeck-Lab/cheminformatics-python-microservice/actions/workflows/release-please.yml/badge.svg)
[![framework](https://img.shields.io/badge/Framework-FastAPI-blue?style)](https://fastapi.tiangolo.com/)
[![FastAPI Documentation](https://img.shields.io/badge/docs-fastapi-blue)](https://api.naturalproducts.net/v1/docs#/)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7747862.svg)](https://doi.org/10.5281/zenodo.7747862)
[![Documentation Status](https://readthedocs.org/projects/cheminformatics-python-microservice/badge/?version=latest)](https://cheminformatics-python-microservice.readthedocs.io/en/latest/?badge=latest)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8112749.svg)](https://doi.org/10.5281/zenodo.8112749)
## Overview of Cheminformatics Python Microservice

This set of essential and valuable microservices is designed to be accessed via API calls to support cheminformatics. Generally, it is designed to work with SMILES-based inputs and could be used to translate between different machine-readable representations, get Natural Product (NP) likeliness scores, visualize chemical structures, and generate descriptors. In addition, the microservices also host an instance of [STOUT](https://github.com/Kohulan/Smiles-TO-iUpac-Translator) and another instance of [DECIMER](https://github.com/Kohulan/DECIMER-Image_Transformer) (two deep learning models for IUPAC name generation and optical chemical structure recognition, respectively).
Expand All @@ -33,7 +34,7 @@ This project is licensed under the MIT License - see the [LICENSE](https://githu

## Citation

Venkata, C., Sharma, N., & Rajan, K. (2023). Cheminformatics Python Microservice (Version v0.9.0 - prerelease) [Computer software]. https://doi.org/10.5281/zenodo.7747862
Venkata, C., Sharma, N., & Rajan, K. (2023). Cheminformatics Python Microservice (Version V1.0.0) [Computer software]. https://doi.org/10.5281/zenodo.7747862

## Maintained by

Expand Down
36 changes: 26 additions & 10 deletions app/modules/coconut/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@
def getMolBlock(input_text: str):
"""
This function generates a molblock from the
input text.
Args (str): Input text (mol / SMILES)
returns (str): molblock
input text using CDK.
Args (str):
Input text (mol / SMILES)
Returns (str):
molblock
"""
check = rdkitmodules.is_valid_molecule(input_text)

Expand All @@ -25,8 +29,12 @@ def getMolBlock(input_text: str):
def getMolculeHash(smiles: str):
"""
This function returns a set of molecule hashes defined.
Args (str): SMILES string (strandardised is preferred).
Returns (dict): molecule_hash
Args (str):
SMILES string (strandardised is preferred).
Returns (dict):
molecule_hash
"""
mol = Chem.MolFromSmiles(smiles)
if mol:
Expand All @@ -46,8 +54,12 @@ def getRepresentations(smiles: str):
"""
This functions returns COCONUT representations.
InChI, InChi key and Murko framework.
Args (str): SMILES string.
Returns (dict): dictionary of InChI, InChi key and Murko framework.
Args (str):
SMILES string.
Returns (dict):
dictionary of InChI, InChi key and Murko framework.
"""
mol = Chem.MolFromSmiles(smiles)
if mol:
Expand All @@ -57,11 +69,15 @@ def getRepresentations(smiles: str):
return {"InChI": InChI, "InChI_Key": InChI_Key, "Murko": Murko}


def COCONUTpreprocessing(input_text: str):
def getCOCONUTpreprocessing(input_text: str):
"""
This function takes a user input text and returns a dictionary for COCONUT input.
Args (str): input_text (mol/str).
Returns (dict): COCONUT preprocessed data.
Args (str):
input_text (mol/str).
Returns (dict):
COCONUT preprocessed data.
"""
original_mol = getMolBlock(input_text)
standarised_mol_block = rdkitmodules.standardizer.standardize_molblock(original_mol)
Expand Down
3 changes: 3 additions & 0 deletions app/modules/decimer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

def convert_image(path: str):
"""Takes an image filepath of GIF image and returns Hi Res PNG image.
Args:
input_path (str): path of an image.
Expand All @@ -31,6 +32,7 @@ def convert_image(path: str):

def get_segments(path: str):
"""Takes an image filepath and returns a set of paths and image name of segmented images.
Args:
input_path (str): path of an image.
Expand All @@ -50,6 +52,7 @@ def get_segments(path: str):

def getPredictedSegments(path: str):
"""Takes an image filepath and returns predicted SMILES for segmented images.
Args:
input_path (str): path of an image.
Expand Down
70 changes: 47 additions & 23 deletions app/modules/toolkits/cdk_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,11 @@
def getCDKSDG(smiles: str):
"""This function takes the user input SMILES and Creates a
Structure Diagram Layout using the CDK.
Args:
smiles (string): SMILES string given by the user.
smiles (string): SMILES string given by the user.
Returns:
mol object : mol object with CDK SDG.
mol object : mol object with CDK SDG.
"""
if any(char.isspace() for char in smiles):
smiles = smiles.replace(" ", "+")
Expand All @@ -63,10 +64,11 @@ def getCDKSDG(smiles: str):
def getMurkoFramework(smiles: str):
"""This function takes the user input SMILES and returns
the Murko framework
Args:
smiles (string): SMILES string given by the user.
smiles (string): SMILES string given by the user.
Returns:
smiles (string) : Murko Framework as SMILES.
smiles (string) : Murko Framework as SMILES.
"""
if any(char.isspace() for char in smiles):
smiles = smiles.replace(" ", "+")
Expand All @@ -84,11 +86,13 @@ def getMurkoFramework(smiles: str):
def getCDKSDGMol(smiles: str, V3000=False):
"""This function takes the user input SMILES and returns a mol
block as a string with Structure Diagram Layout.
Args:
smiles (string): SMILES string given by the user.
V3000 (boolean): Gives an option to return V3000 mol.
smiles (string): SMILES string given by the user.
V3000 (boolean): Gives an option to return V3000 mol.
Returns:
mol object (string): CDK Structure Diagram Layout mol block.
mol object (string): CDK Structure Diagram Layout mol block.
"""
if any(char.isspace() for char in smiles):
smiles = smiles.replace(" ", "+")
Expand All @@ -107,8 +111,12 @@ def getAromaticRingCount(mol):
"""This function is adapted from CDK to
calculate the number of Aromatic Rings
present in a given molecule.
Args (mol): CDK mol object as input.
Returns (int): Number if aromatic rings present
Args (mol):
CDK mol object as input.
Returns (int):
Number if aromatic rings present.
"""
Cycles = JClass(cdk_base + ".graph.Cycles")
ElectronDonation = JClass(cdk_base + ".aromaticity.ElectronDonation")
Expand All @@ -133,8 +141,12 @@ def getAromaticRingCount(mol):
def getCDKDescriptors(smiles: str):
"""Take an input SMILES and generate a selected set of molecular
descriptors generated using CDK as a list.
Args (str): SMILES string
Returns (list): a list of calculated descriptors
Args (str):
SMILES string.
Returns (list):
A list of calculated descriptors.
"""
Mol = getCDKSDG(smiles)
if Mol:
Expand Down Expand Up @@ -229,8 +241,11 @@ def getTanimotoSimilarityCDK(smiles1: str, smiles2: str):
Take two SMILES strings and calculate
Tanimoto similarity index using Pubchem
Fingerprints.
Args (str,str): SMILES strings.
Returns (float): Tanimoto similarity.
Args (str,str):
SMILES strings.
Returns (float):
Tanimoto similarity.
"""
Tanimoto = JClass(cdk_base + ".similarity.Tanimoto")
SCOB = JClass(cdk_base + ".silent.SilentChemObjectBuilder")
Expand Down Expand Up @@ -289,8 +304,12 @@ def getCIPAnnotation(smiles: str):
"""
The function return the CIP annotations using the CDK
CIP toolkit.
Args: mol block
Returns: CIP annotated mol block
Args (str):
SMILES string as input.
Returns (str):
CIP annotated mol block.
"""
mol = getCDKSDG(smiles)
centres_base = "com.simolecule.centres"
Expand Down Expand Up @@ -392,10 +411,11 @@ def getCIPAnnotation(smiles: str):
def getCXSMILES(smiles: str):
"""This function takes the user input SMILES and creates a
CXSMILES string with 2D atom coordinates
Args:
smiles (string): SMILES string given by the user.
smiles (str): SMILES string given by the user.
Returns:
smiles (string): CXSMILES string.
smiles (str): CXSMILES string.
"""
moleculeSDG = getCDKSDG(smiles)
Expand All @@ -410,10 +430,11 @@ def getCXSMILES(smiles: str):
def getCanonSMILES(smiles: str):
"""This function takes the user input SMILES and creates a
Canonical SMILES string with 2D atom coordinates
Args:
smiles (string): SMILES string given by the user.
smiles (str): SMILES string given by the user.
Returns:
smiles (string): Canonical SMILES string.
smiles (str): Canonical SMILES string.
"""
moleculeSDG = getCDKSDG(smiles)
Expand All @@ -426,10 +447,11 @@ def getCanonSMILES(smiles: str):
def getInChI(smiles: str, InChIKey=False):
"""This function takes the user input SMILES and creates a
InChI string
Args:
smiles (string): SMILES string given by the user.
smiles (str): SMILES string given by the user.
Returns:
smiles (string): InChI/InChIKey string.
smiles (str): InChI/InChIKey string.
"""
moleculeSDG = getCDKSDG(smiles)
Expand All @@ -450,10 +472,12 @@ def getInChI(smiles: str, InChIKey=False):
async def getCDKHOSECodes(smiles: str, noOfSpheres: int, ringsize: bool):
"""This function takes the user input SMILES and returns a mol
block as a string with Structure Diagram Layout.
Args:
smiles(str), noOfSpheres(int), ringsize(bool): SMILES string, No of Spheres and the ringsize given by the user.
smiles(str), noOfSpheres(int), ringsize(bool): SMILES string, No of Spheres and the ringsize given by the user.
Returns:
HOSECodes (string): CDK generted HOSECodes.
HOSECodes (str): CDK generted HOSECodes.
"""
if any(char.isspace() for char in smiles):
smiles = smiles.replace(" ", "+")
Expand Down
Loading

0 comments on commit de2515c

Please sign in to comment.