Skip to content

Commit

Permalink
feat: include True,False options
Browse files Browse the repository at this point in the history
  • Loading branch information
Kohulan committed Feb 21, 2024
1 parent 32753f2 commit c13fba5
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 45 deletions.
77 changes: 55 additions & 22 deletions app/modules/coconut/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,46 +87,79 @@ def get_representations(molecule: any) -> dict:
return {"Error": "Check input SMILES"}


def get_COCONUT_preprocessing(input_text: str) -> dict:
def get_COCONUT_preprocessing(
input_text: str, _3d_mol: bool = False, descriptors: bool = False
) -> dict:
"""Preprocess user input text suitable for the COCONUT database submission.
Args:
input_text (str): Input text (Mol/str).
input_text (str): The input text representing a chemical compound in Mol format.
_3d_mol (bool, optional): Flag indicating whether to generate 3D coordinates for the molecule. Defaults to False.
descriptors (bool, optional): Flag indicating whether to generate COCONUT descriptors for the molecule. Defaults to False.
Returns:
dict: COCONUT preprocessed data.
dict: A dictionary containing COCONUT preprocessed data with representations, descriptors, and errors.
Raises:
InvalidInputException: If the input SMILES string is invalid.
"""
try:
# Preprocess input text
input_text = input_text.replace(" ", "+").replace("\\\\", "\\")

# Original molecule
original_mol = parse_input(input_text, "rdkit", False)
original_mol_block = get_mol_block(input_text)
original_mol_hash = get_molecule_hash(original_mol)
original_representations = get_representations(original_mol)
original_descriptors = get_COCONUT_descriptors(input_text, "rdkit")
standarised_mol_block = standardizer.standardize_molblock(original_mol_block)

# Standardized molecule
standardized_mol_block = standardizer.standardize_molblock(original_mol_block)
standardized_SMILES = Chem.MolToSmiles(
Chem.MolFromMolBlock(standarised_mol_block),
kekuleSmiles=True,
Chem.MolFromMolBlock(standardized_mol_block), kekuleSmiles=True
)

standardized_mol = parse_input(standardized_SMILES, "rdkit", False)
standardized_representations = get_representations(standardized_mol)
standardized_descriptors = get_COCONUT_descriptors(standardized_SMILES, "rdkit")

# Parent molecule
parent_canonical_smiles = original_mol_hash["Canonical_SMILES"]
parent_mol_block = get_mol_block(parent_canonical_smiles)
rdkitParentMol = parse_input(parent_canonical_smiles, "rdkit", False)
parent_3D_molblock = rdkitmodules.get_3d_conformers(rdkitParentMol)

parent_representations = get_representations(rdkitParentMol)
parent_descriptors = get_COCONUT_descriptors(parent_canonical_smiles, "rdkit")

# Compute descriptors if requested
if descriptors:
original_descriptors = get_COCONUT_descriptors(input_text, "rdkit")
standardized_descriptors = get_COCONUT_descriptors(
standardized_SMILES, "rdkit"
)
parent_descriptors = get_COCONUT_descriptors(
parent_canonical_smiles, "rdkit"
)
else:
original_descriptors = {"descriptors": "Not computed, enable for computing"}
standardized_descriptors = {
"descriptors": "Not computed, enable for computing"
}
parent_descriptors = {"descriptors": "Not computed, enable for computing"}

# Compute 3D conformers if requested
if _3d_mol:
original_3d_mol_block = rdkitmodules.get_3d_conformers(original_mol)
standardized_3d_mol_block = rdkitmodules.get_3d_conformers(standardized_mol)
parent_3D_mol_block = rdkitmodules.get_3d_conformers(rdkitParentMol)
else:
original_3d_mol_block = "Not computed, enable for computing"
standardized_3d_mol_block = "Not computed, enable for computing"
parent_3D_mol_block = "Not computed, enable for computing"

# Construct and return the COCONUT preprocessed data
return {
"original": {
"representations": {
"2D_MOL": original_mol_block,
"3D_MOL": rdkitmodules.get_3d_conformers(original_mol),
"cannonical_smiles": original_mol_hash["Isomeric_SMILES"],
"3D_MOL": original_3d_mol_block,
"canonical_smiles": original_mol_hash["Isomeric_SMILES"],
**original_representations,
},
"has_stereo": rdkitmodules.has_stereochemistry(original_mol),
Expand All @@ -136,24 +169,24 @@ def get_COCONUT_preprocessing(input_text: str) -> dict:
"standardized": {
"representations": {
"2D_MOL": original_mol_block,
"3D_MOL": rdkitmodules.get_3d_conformers(standardized_mol),
"cannonical_smiles": standardized_SMILES,
"3D_MOL": standardized_3d_mol_block,
"canonical_smiles": standardized_SMILES,
**standardized_representations,
},
"has_stereo": rdkitmodules.has_stereochemistry(standardized_mol),
"descriptors": standardized_descriptors,
"errors": checker.check_molblock(standarised_mol_block),
"errors": checker.check_molblock(standardized_mol_block),
},
"parent": {
"representations": {
"3D_MOL": parent_3D_molblock,
"cannonical_smiles": parent_canonical_smiles,
"2D_MOL": parent_mol_block,
"3D_MOL": parent_3D_mol_block,
"canonical_smiles": parent_canonical_smiles,
**parent_representations,
},
"has_stereo": rdkitmodules.has_stereochemistry(rdkitParentMol),
"descriptors": parent_descriptors,
},
}
except InvalidInputException as e:
print(e)
return {"Error": f"Invalid input SMILES: {input_text}"}
except InvalidInputException:
raise InvalidInputException(f"Invalid input SMILES: {input_text}")
21 changes: 16 additions & 5 deletions app/routers/chem.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,19 +746,30 @@ async def tanimoto_similarity(
)
async def coconut_preprocessing(
smiles: str = Query(
...,
title="SMILES",
description="SMILES string representing a chemical compound",
openapi_examples={
"example1": {
examples={
"Caffeine": {
"summary": "Example: Caffeine",
"value": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
},
"example2": {
"Topiramate-13C6": {
"summary": "Example: Topiramate-13C6",
"value": "CC1(C)OC2COC3(COS(N)(=O)=O)OC(C)(C)OC3C2O1",
},
},
),
_3d_mol: bool = Query(
False,
title="3D_mol",
description="Flag indicating whether to generate 3D coordinates for a given molecule",
),
descriptors: bool = Query(
False,
title="descriptors",
description="Flag indicating whether to generate COCONUT descriptors for a given molecule",
),
):
"""Generates an Input JSON file with information on different molecular.
Expand All @@ -775,9 +786,9 @@ async def coconut_preprocessing(
- HTTPException: If there is an error reading the SMILES string.
"""
try:
data = get_COCONUT_preprocessing(smiles)
data = get_COCONUT_preprocessing(smiles, _3d_mol, descriptors)
if data:
return JSONResponse(content=data)
return data
else:
raise HTTPException(
status_code=422,
Expand Down
62 changes: 45 additions & 17 deletions app/schemas/coconut.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,15 @@ class Representations(BaseModel):
"""Represents different representations of a molecule.
Attributes:
field_2D_mol (str): The 2D molecular structure of the parent molecule.
field_3D_mol (str): The 3D molecular structure of the parent molecule.
InChI (str): IUPAC International Chemical Identifier (InChI) representation.
InChI_Key (str): InChI key.
Murko (str): Murcko scaffold representation.
"""

field_2D_mol: str = Field(..., alias="2D_mol")
field_3D_mol: str = Field(..., alias="3D_mol")
InChI: str
InChI_Key: str
Murko: str
Expand Down Expand Up @@ -87,39 +91,63 @@ class Descriptors(BaseModel):
nplikeness: float


class Original(BaseModel):
"""Represents the parent molecule with various properties.
Attributes:
representations (Representations): Molecular representations.
has_stereo (bool): Indicates presence of stereochemical variants.
descriptors (Descriptors): Molecular descriptors.
errors (dict): Information on the errors found in the given molecule
"""

representations: Representations
has_stereo: bool
descriptors: Descriptors
errors: dict


class Standardized(BaseModel):
"""Represents the parent molecule with various properties.
Attributes:
representations (Representations): Molecular representations.
has_stereo (bool): Indicates presence of stereochemical variants.
descriptors (Descriptors): Molecular descriptors.
errors (dict): Information on the errors found in the given molecule
"""

representations: Representations
has_stereo: bool
descriptors: Descriptors
errors: dict


class Parent(BaseModel):
"""Represents the parent molecule with various properties.
Attributes:
field_2D_mol (str): The 2D molecular structure of the parent molecule.
field_3D_mol (str): The 3D molecular structure of the parent molecule.
v3000 (str): A specific molecular structure format.
representations (Representations): Molecular representations.
has_stereo (bool): Indicates presence of stereochemical variants.
descriptors (Descriptors): Molecular descriptors.
errors (dict): Information on the errors found in the given molecule
"""

field_2D_mol: str = Field(..., alias="2D_mol")
field_3D_mol: str = Field(..., alias="3D_mol")
v3000: str
representations: Representations
has_stereo: bool
descriptors: Descriptors
errors: dict


class COCONUTPreprocessingModel(BaseModel):
"""Represents a molecule after CocoNut preprocessing.
Attributes:
original_mol (str): Original molecule information.
standardised_mol (str): Standardized molecule information.
standardised_SMILES (str): Standardized SMILES notation.
molecule_hash (MoleculeHash): Hash information of the molecule.
parent (Parent): Parent molecule details.
stereochemical_variants (bool): Indicates presence of stereochemical variants.
original_mol (Original): Original molecule information.
standardised_mol (Standardized): Standardized molecule information.
parent (Parent): Parent molecule information.
"""

original_mol: str
standardised_mol: str
standardised_SMILES: str
molecule_hash: MoleculeHash
original_mol: Original
standardised_mol: Standardized
parent: Parent
stereochemical_variants: bool
2 changes: 1 addition & 1 deletion tests/test_chem.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def test_exception_standardize_mol(invalid_molfile, exception_response_code):

@pytest.mark.parametrize(
"smiles, response_code",
[("CCO", 200), ("INVALID_INPUT", 200)],
[("CCO", 200), ("INVALID_INPUT", 422)],
)
def test_successful_coconut_preprocessing(smiles, response_code):
response = client.get(
Expand Down

0 comments on commit c13fba5

Please sign in to comment.