From 3ba35b5604a3a930f2a1570d732bfabaef975df7 Mon Sep 17 00:00:00 2001 From: Kohulan Date: Fri, 23 Feb 2024 09:32:57 +0100 Subject: [PATCH] fix: SMILES parse error handling --- app/modules/coconut/preprocess.py | 10 ++++------ app/modules/toolkits/helpers.py | 12 ++++++++++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/app/modules/coconut/preprocess.py b/app/modules/coconut/preprocess.py index 6308fc5..665e2cc 100644 --- a/app/modules/coconut/preprocess.py +++ b/app/modules/coconut/preprocess.py @@ -23,19 +23,16 @@ def get_mol_block(input_text: str) -> str: Raises: ValueError: If input_text is not a valid Mol or SMILES. """ - check = rdkitmodules.is_valid_molecule(input_text) - if check == "smiles": + try: molecule = parse_input(input_text, "cdk", False) mol_block = cdk.get_CDK_SDG_mol( molecule, V3000=False, ).replace("$$$$\n", "") return mol_block - elif check == "mol": - return input_text - else: - return "Error!, Check the input text." + except InvalidInputException: + raise InvalidInputException(f"Invalid input SMILES: {input_text}") def get_molecule_hash(molecule: any) -> dict: @@ -109,6 +106,7 @@ def get_COCONUT_preprocessing( # Original molecule original_mol = parse_input(input_text, "rdkit", False) + original_mol_block = get_mol_block(input_text) original_mol_hash = get_molecule_hash(original_mol) original_representations = get_representations(original_mol) diff --git a/app/modules/toolkits/helpers.py b/app/modules/toolkits/helpers.py index 57746d4..96e63df 100644 --- a/app/modules/toolkits/helpers.py +++ b/app/modules/toolkits/helpers.py @@ -35,6 +35,8 @@ def parse_SMILES(smiles: str, framework: str = "rdkit", standardize: bool = Fals Args: smiles (str): Input SMILES string. + framework (str): Framework to use for parsing. Default is "rdkit". + standardize (bool): Whether to standardize the molecule. Default is False. Returns: Chem.Mol or None: Valid molecule object or None if an error occurs. @@ -43,7 +45,7 @@ def parse_SMILES(smiles: str, framework: str = "rdkit", standardize: bool = Fals try: smiles = smiles.replace(" ", "+") if framework == "rdkit": - if smiles.__contains__("R"): + if "R" in smiles: mol = get_CDK_IAtomContainer(smiles) mol_str = get_CDK_SDG_mol(mol) mol = Chem.MolFromMolBlock(mol_str) @@ -55,9 +57,15 @@ def parse_SMILES(smiles: str, framework: str = "rdkit", standardize: bool = Fals mol = get_CDK_IAtomContainer(smiles) elif framework == "openbabel": mol = get_ob_mol(smiles) + else: + raise ValueError(f"Invalid framework specified: {framework}") + if mol: return mol else: - raise InvalidInputException(name="smiles", value=smiles) + mol = get_CDK_IAtomContainer(smiles) + mol_str = get_CDK_SDG_mol(mol) + return Chem.MolFromMolBlock(mol_str) + except Exception: raise InvalidInputException(name="smiles", value=smiles)