From 0deaff5432f55ede21749fceb29272c46d5f329b Mon Sep 17 00:00:00 2001 From: SamuelBehr Date: Thu, 10 Aug 2023 13:58:45 +0200 Subject: [PATCH 1/3] Fixed SD files not being imported in their entire length if structures fail to be read by the reader (caused the import to stop); structures imported from SDF are now assigned the file name extended with the index of the structure in the file (formerly the count of successfull imports) if no name of the structure could be detected; --- .../cheminf/mortar/model/io/Importer.java | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java b/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java index d20238fe..ac9827a3 100644 --- a/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java +++ b/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java @@ -255,7 +255,8 @@ else if(tmpFormat.getFormatName().equalsIgnoreCase(MDLV3000Format.getInstance(). } // /** - * Imports an SD file. + * Imports an SD file. If no name can be detected for a structure, the file name extended with the index of the + * structure in the file is used as name of the structure. * * @param aFile sdf * @return the imported molecules in an IAtomContainerSet @@ -266,15 +267,32 @@ private IAtomContainerSet importSDFile(File aFile) throws FileNotFoundException IteratingSDFReader tmpSDFReader = new IteratingSDFReader(new FileInputStream(aFile), SilentChemObjectBuilder.getInstance()); int tmpCounter = 0; - while(!Thread.currentThread().isInterrupted() && tmpSDFReader.hasNext()){ + while(!Thread.currentThread().isInterrupted()){ + if (!tmpSDFReader.hasNext()) { + tmpSDFReader.setSkip(true); + if (!tmpSDFReader.hasNext()) { + // there is no next + break; + } + // molecule just could not be read and has therefore been skipped + tmpSDFReader.setSkip(false); + Importer.LOGGER.info("Import failed for structure:\t" + tmpCounter + " (index of structure in file)."); + tmpCounter++; + } IAtomContainer tmpAtomContainer = tmpSDFReader.next(); String tmpName = this.findMoleculeName(tmpAtomContainer); - if(tmpName == null || tmpName.isBlank() || tmpName.isEmpty()) + if(tmpName == null || tmpName.isBlank()) + // the counter here equals the index of the structure in the file tmpName = FileUtil.getFileNameWithoutExtension(aFile) + tmpCounter; tmpAtomContainer.setProperty(Importer.MOLECULE_NAME_PROPERTY_KEY, tmpName); tmpAtomContainerSet.addAtomContainer(tmpAtomContainer); tmpCounter++; } + int tmpFailedImportsCount = tmpCounter - tmpAtomContainerSet.getAtomContainerCount(); + if (tmpFailedImportsCount > 0) { + Importer.LOGGER.warning("The import from SD file failed for a total of " + tmpFailedImportsCount + + " structure(s)."); + } return tmpAtomContainerSet; } // From dc7a095422818184787341582719af9ce7a9845b Mon Sep 17 00:00:00 2001 From: Jonas Schaub <44881147+JonasSchaub@users.noreply.github.com> Date: Wed, 21 Feb 2024 15:11:34 +0100 Subject: [PATCH 2/3] Additional comments; --- .../cheminf/mortar/model/io/Importer.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java b/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java index ac9827a3..ebf8206c 100644 --- a/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java +++ b/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java @@ -257,6 +257,8 @@ else if(tmpFormat.getFormatName().equalsIgnoreCase(MDLV3000Format.getInstance(). /** * Imports an SD file. If no name can be detected for a structure, the file name extended with the index of the * structure in the file is used as name of the structure. + * NOTE: if multiple erroneous entries in a row are there in the input file, they are skipped together and not + * logged individually! * * @param aFile sdf * @return the imported molecules in an IAtomContainerSet @@ -264,26 +266,29 @@ else if(tmpFormat.getFormatName().equalsIgnoreCase(MDLV3000Format.getInstance(). */ private IAtomContainerSet importSDFile(File aFile) throws FileNotFoundException { IAtomContainerSet tmpAtomContainerSet = new AtomContainerSet(); - IteratingSDFReader tmpSDFReader = new IteratingSDFReader(new FileInputStream(aFile), - SilentChemObjectBuilder.getInstance()); + /*the IteratingSDFReader is not set to skip erroneous input molecules in its constructor to be able to log them*/ + IteratingSDFReader tmpSDFReader = new IteratingSDFReader(new FileInputStream(aFile), SilentChemObjectBuilder.getInstance()); int tmpCounter = 0; - while(!Thread.currentThread().isInterrupted()){ + while (!Thread.currentThread().isInterrupted()) { + //end of file or encountered erroneous entry if (!tmpSDFReader.hasNext()) { + //skip if it is an erroneous entry tmpSDFReader.setSkip(true); if (!tmpSDFReader.hasNext()) { - // there is no next + // there is no next, end of file! break; } - // molecule just could not be read and has therefore been skipped + // molecule just could not be read and has therefore been skipped, restore skip setting for next iteration tmpSDFReader.setSkip(false); Importer.LOGGER.info("Import failed for structure:\t" + tmpCounter + " (index of structure in file)."); tmpCounter++; } IAtomContainer tmpAtomContainer = tmpSDFReader.next(); String tmpName = this.findMoleculeName(tmpAtomContainer); - if(tmpName == null || tmpName.isBlank()) + if(tmpName == null || tmpName.isBlank()) { // the counter here equals the index of the structure in the file tmpName = FileUtil.getFileNameWithoutExtension(aFile) + tmpCounter; + } tmpAtomContainer.setProperty(Importer.MOLECULE_NAME_PROPERTY_KEY, tmpName); tmpAtomContainerSet.addAtomContainer(tmpAtomContainer); tmpCounter++; From 01841485bf94028d53908073f5ab8b957f1b1e13 Mon Sep 17 00:00:00 2001 From: Jonas Schaub <44881147+JonasSchaub@users.noreply.github.com> Date: Wed, 21 Feb 2024 15:48:28 +0100 Subject: [PATCH 3/3] More specific log messages; --- .../mortar/controller/MainViewController.java | 6 ++++-- .../unijena/cheminf/mortar/model/io/Importer.java | 13 ++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/main/java/de/unijena/cheminf/mortar/controller/MainViewController.java b/src/main/java/de/unijena/cheminf/mortar/controller/MainViewController.java index 28452120..0ef80440 100644 --- a/src/main/java/de/unijena/cheminf/mortar/controller/MainViewController.java +++ b/src/main/java/de/unijena/cheminf/mortar/controller/MainViewController.java @@ -514,8 +514,10 @@ protected IAtomContainerSet call() throws Exception { tmpMoleculeDataModel.setName(tmpAtomContainer.getProperty(Importer.MOLECULE_NAME_PROPERTY_KEY)); this.moleculeDataModelList.add(tmpMoleculeDataModel); } - MainViewController.LOGGER.log(Level.INFO, "Imported " + tmpAtomContainerSet.getAtomContainerCount() + " molecules from file: " + tmpImporter.getFileName() - + " " + tmpExceptionCount + " molecules could not be parsed into the internal data model."); + MainViewController.LOGGER.log(Level.INFO, "Successfully imported " + tmpAtomContainerSet.getAtomContainerCount() + + " molecules from file: " + tmpImporter.getFileName() + "; " + tmpExceptionCount + + " molecules could not be parsed into the internal data model (SMILES code generation failed). " + + "See above how many molecules could not be read from the input file at all or produced exceptions while preprocessing."); this.updateStatusBar(this.importerThread, Message.get("Status.imported")); this.isImportRunningProperty.setValue(false); this.mainView.getMainCenterPane().setStyle("-fx-background-image: none"); diff --git a/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java b/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java index ebf8206c..b985c5c3 100644 --- a/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java +++ b/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java @@ -109,8 +109,9 @@ public Importer(SettingsContainer aSettingsContainer) throws NullPointerExceptio Objects.requireNonNull(aSettingsContainer, "Given settings container is null."); this.settingsContainer = aSettingsContainer; String tmpRecentDirFromContainer = this.settingsContainer.getRecentDirectoryPathSetting(); - if(tmpRecentDirFromContainer == null || tmpRecentDirFromContainer.isEmpty()) { + if (tmpRecentDirFromContainer == null || tmpRecentDirFromContainer.isEmpty()) { this.settingsContainer.setRecentDirectoryPathSetting(SettingsContainer.RECENT_DIRECTORY_PATH_SETTING_DEFAULT); + Importer.LOGGER.log(Level.INFO, "Recent directory could not be read, resetting to default."); } this.fileName = null; } @@ -136,6 +137,7 @@ public IAtomContainerSet importMoleculeFile(File aFile) throws NullPointerExcept String tmpRecentDirFromContainer = this.settingsContainer.getRecentDirectoryPathSetting(); if(tmpRecentDirFromContainer == null || tmpRecentDirFromContainer.isEmpty()) { this.settingsContainer.setRecentDirectoryPathSetting(SettingsContainer.RECENT_DIRECTORY_PATH_SETTING_DEFAULT); + Importer.LOGGER.log(Level.INFO, "Recent directory could not be read, resetting to default."); } String tmpFilePath = aFile.getPath(); String tmpFileExtension = FileUtil.getFileExtension(tmpFilePath); @@ -191,6 +193,7 @@ public File openFile(Stage aParentStage) throws NullPointerException { if(!tmpRecentDirectory.isDirectory()) { tmpRecentDirectory = new File(SettingsContainer.RECENT_DIRECTORY_PATH_SETTING_DEFAULT); this.settingsContainer.setRecentDirectoryPathSetting(SettingsContainer.RECENT_DIRECTORY_PATH_SETTING_DEFAULT); + Importer.LOGGER.log(Level.INFO, "Recent directory could not be read, resetting to default."); } tmpFileChooser.setInitialDirectory(tmpRecentDirectory); File tmpFile = null; @@ -280,7 +283,7 @@ private IAtomContainerSet importSDFile(File aFile) throws FileNotFoundException } // molecule just could not be read and has therefore been skipped, restore skip setting for next iteration tmpSDFReader.setSkip(false); - Importer.LOGGER.info("Import failed for structure:\t" + tmpCounter + " (index of structure in file)."); + Importer.LOGGER.log(Level.WARNING, "Import failed for structure:\t" + tmpCounter + " (index of structure in file)."); tmpCounter++; } IAtomContainer tmpAtomContainer = tmpSDFReader.next(); @@ -295,7 +298,7 @@ private IAtomContainerSet importSDFile(File aFile) throws FileNotFoundException } int tmpFailedImportsCount = tmpCounter - tmpAtomContainerSet.getAtomContainerCount(); if (tmpFailedImportsCount > 0) { - Importer.LOGGER.warning("The import from SD file failed for a total of " + tmpFailedImportsCount + + Importer.LOGGER.log(Level.WARNING, "The import from SD file failed for a total of " + tmpFailedImportsCount + " structure(s)."); } return tmpAtomContainerSet; @@ -385,7 +388,7 @@ private void preprocessMoleculeSet(IAtomContainerSet aMoleculeSet) throws NullPo } /* note: Things like assigning bond orders and atom types here is redundant if the atom containers are discarded after molecule set import and molecular information only represented by SMILES codes in - the molecule data models. Nevertheless it is done here to ensure that the generated SMILES codes are correct. + the molecule data models. Nevertheless, it is done here to ensure that the generated SMILES codes are correct. */ int tmpExceptionsCounter = 0; for (IAtomContainer tmpMolecule : aMoleculeSet.atomContainers()) { @@ -407,7 +410,7 @@ private void preprocessMoleculeSet(IAtomContainerSet aMoleculeSet) throws NullPo tmpExceptionsCounter++; } } - Importer.LOGGER.log(Level.INFO, "Imported and preprocessed molecule set. " + tmpExceptionsCounter + " exceptions occurred."); + Importer.LOGGER.log(Level.WARNING, "Imported and preprocessed molecule set. " + tmpExceptionsCounter + " exceptions occurred while processing."); } // //