Skip to content

Commit

Permalink
Merge pull request #12 from FelixBaensch/SDF_Import_fix
Browse files Browse the repository at this point in the history
Fixed SD files not being imported completely
  • Loading branch information
JonasSchaub authored Feb 21, 2024
2 parents 009cd29 + 8ef93bb commit 248ee59
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,10 @@ protected IAtomContainerSet call() throws Exception {
tmpMoleculeDataModel.setName(tmpAtomContainer.getProperty(Importer.MOLECULE_NAME_PROPERTY_KEY));
this.moleculeDataModelList.add(tmpMoleculeDataModel);
}
MainViewController.LOGGER.log(Level.INFO, "Imported " + tmpAtomContainerSet.getAtomContainerCount() + " molecules from file: " + tmpImporter.getFileName()
+ " " + tmpExceptionCount + " molecules could not be parsed into the internal data model.");
MainViewController.LOGGER.log(Level.INFO, "Successfully imported " + tmpAtomContainerSet.getAtomContainerCount()
+ " molecules from file: " + tmpImporter.getFileName() + "; " + tmpExceptionCount
+ " molecules could not be parsed into the internal data model (SMILES code generation failed). " +
"See above how many molecules could not be read from the input file at all or produced exceptions while preprocessing.");
this.updateStatusBar(this.importerThread, Message.get("Status.imported"));
this.isImportRunningProperty.setValue(false);
this.mainView.getMainCenterPane().setStyle("-fx-background-image: none");
Expand Down
42 changes: 34 additions & 8 deletions src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,9 @@ public Importer(SettingsContainer aSettingsContainer) throws NullPointerExceptio
Objects.requireNonNull(aSettingsContainer, "Given settings container is null.");
this.settingsContainer = aSettingsContainer;
String tmpRecentDirFromContainer = this.settingsContainer.getRecentDirectoryPathSetting();
if(tmpRecentDirFromContainer == null || tmpRecentDirFromContainer.isEmpty()) {
if (tmpRecentDirFromContainer == null || tmpRecentDirFromContainer.isEmpty()) {
this.settingsContainer.setRecentDirectoryPathSetting(SettingsContainer.RECENT_DIRECTORY_PATH_SETTING_DEFAULT);
Importer.LOGGER.log(Level.INFO, "Recent directory could not be read, resetting to default.");
}
this.fileName = null;
}
Expand All @@ -141,6 +142,7 @@ public IAtomContainerSet importMoleculeFile(File aFile) throws NullPointerExcept
String tmpRecentDirFromContainer = this.settingsContainer.getRecentDirectoryPathSetting();
if(tmpRecentDirFromContainer == null || tmpRecentDirFromContainer.isEmpty()) {
this.settingsContainer.setRecentDirectoryPathSetting(SettingsContainer.RECENT_DIRECTORY_PATH_SETTING_DEFAULT);
Importer.LOGGER.log(Level.INFO, "Recent directory could not be read, resetting to default.");
}
String tmpFilePath = aFile.getPath();
String tmpFileExtension = FileUtil.getFileExtension(tmpFilePath);
Expand Down Expand Up @@ -196,6 +198,7 @@ public File openFile(Stage aParentStage) throws NullPointerException {
if(!tmpRecentDirectory.isDirectory()) {
tmpRecentDirectory = new File(SettingsContainer.RECENT_DIRECTORY_PATH_SETTING_DEFAULT);
this.settingsContainer.setRecentDirectoryPathSetting(SettingsContainer.RECENT_DIRECTORY_PATH_SETTING_DEFAULT);
Importer.LOGGER.log(Level.INFO, "Recent directory could not be read, resetting to default.");
}
tmpFileChooser.setInitialDirectory(tmpRecentDirectory);
File tmpFile = null;
Expand Down Expand Up @@ -260,26 +263,49 @@ else if(tmpFormat.getFormatName().equalsIgnoreCase(MDLV3000Format.getInstance().
}
//
/**
* Imports an SD file.
* Imports an SD file. If no name can be detected for a structure, the file name extended with the index of the
* structure in the file is used as name of the structure.
* NOTE: if multiple erroneous entries in a row are there in the input file, they are skipped together and not
* logged individually!
*
* @param aFile sdf
* @return the imported molecules in an IAtomContainerSet
* @throws FileNotFoundException if a file input stream cannot be opened for the given file
*/
private IAtomContainerSet importSDFile(File aFile) throws FileNotFoundException {
IAtomContainerSet tmpAtomContainerSet = new AtomContainerSet();
IteratingSDFReader tmpSDFReader = new IteratingSDFReader(new FileInputStream(aFile),
SilentChemObjectBuilder.getInstance());
/*the IteratingSDFReader is not set to skip erroneous input molecules in its constructor to be able to log them*/
IteratingSDFReader tmpSDFReader = new IteratingSDFReader(new FileInputStream(aFile), SilentChemObjectBuilder.getInstance());
int tmpCounter = 0;
while(!Thread.currentThread().isInterrupted() && tmpSDFReader.hasNext()){
while (!Thread.currentThread().isInterrupted()) {
//end of file or encountered erroneous entry
if (!tmpSDFReader.hasNext()) {
//skip if it is an erroneous entry
tmpSDFReader.setSkip(true);
if (!tmpSDFReader.hasNext()) {
// there is no next, end of file!
break;
}
// molecule just could not be read and has therefore been skipped, restore skip setting for next iteration
tmpSDFReader.setSkip(false);
Importer.LOGGER.log(Level.WARNING, "Import failed for structure:\t" + tmpCounter + " (index of structure in file).");
tmpCounter++;
}
IAtomContainer tmpAtomContainer = tmpSDFReader.next();
String tmpName = this.findMoleculeName(tmpAtomContainer);
if(tmpName == null || tmpName.isBlank() || tmpName.isEmpty())
if(tmpName == null || tmpName.isBlank()) {
// the counter here equals the index of the structure in the file
tmpName = FileUtil.getFileNameWithoutExtension(aFile) + tmpCounter;
}
tmpAtomContainer.setProperty(Importer.MOLECULE_NAME_PROPERTY_KEY, tmpName);
tmpAtomContainerSet.addAtomContainer(tmpAtomContainer);
tmpCounter++;
}
int tmpFailedImportsCount = tmpCounter - tmpAtomContainerSet.getAtomContainerCount();
if (tmpFailedImportsCount > 0) {
Importer.LOGGER.log(Level.WARNING, "The import from SD file failed for a total of " + tmpFailedImportsCount +
" structure(s).");
}
return tmpAtomContainerSet;
}
//
Expand Down Expand Up @@ -367,7 +393,7 @@ private void preprocessMoleculeSet(IAtomContainerSet aMoleculeSet) throws NullPo
}
/* note: Things like assigning bond orders and atom types here is redundant if the atom containers
are discarded after molecule set import and molecular information only represented by SMILES codes in
the molecule data models. Nevertheless it is done here to ensure that the generated SMILES codes are correct.
the molecule data models. Nevertheless, it is done here to ensure that the generated SMILES codes are correct.
*/
int tmpExceptionsCounter = 0;
for (IAtomContainer tmpMolecule : aMoleculeSet.atomContainers()) {
Expand All @@ -389,7 +415,7 @@ private void preprocessMoleculeSet(IAtomContainerSet aMoleculeSet) throws NullPo
tmpExceptionsCounter++;
}
}
Importer.LOGGER.log(Level.INFO, "Imported and preprocessed molecule set. " + tmpExceptionsCounter + " exceptions occurred.");
Importer.LOGGER.log(Level.WARNING, "Imported and preprocessed molecule set. " + tmpExceptionsCounter + " exceptions occurred while processing.");
}
//</editor-fold>
//
Expand Down

0 comments on commit 248ee59

Please sign in to comment.