Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed SD files not being imported completely #12

Merged
merged 4 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,10 @@ protected IAtomContainerSet call() throws Exception {
tmpMoleculeDataModel.setName(tmpAtomContainer.getProperty(Importer.MOLECULE_NAME_PROPERTY_KEY));
this.moleculeDataModelList.add(tmpMoleculeDataModel);
}
MainViewController.LOGGER.log(Level.INFO, "Imported " + tmpAtomContainerSet.getAtomContainerCount() + " molecules from file: " + tmpImporter.getFileName()
+ " " + tmpExceptionCount + " molecules could not be parsed into the internal data model.");
MainViewController.LOGGER.log(Level.INFO, "Successfully imported " + tmpAtomContainerSet.getAtomContainerCount()
+ " molecules from file: " + tmpImporter.getFileName() + "; " + tmpExceptionCount
+ " molecules could not be parsed into the internal data model (SMILES code generation failed). " +
"See above how many molecules could not be read from the input file at all or produced exceptions while preprocessing.");
this.updateStatusBar(this.importerThread, Message.get("Status.imported"));
this.isImportRunningProperty.setValue(false);
this.mainView.getMainCenterPane().setStyle("-fx-background-image: none");
Expand Down
42 changes: 34 additions & 8 deletions src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,9 @@ public Importer(SettingsContainer aSettingsContainer) throws NullPointerExceptio
Objects.requireNonNull(aSettingsContainer, "Given settings container is null.");
this.settingsContainer = aSettingsContainer;
String tmpRecentDirFromContainer = this.settingsContainer.getRecentDirectoryPathSetting();
if(tmpRecentDirFromContainer == null || tmpRecentDirFromContainer.isEmpty()) {
if (tmpRecentDirFromContainer == null || tmpRecentDirFromContainer.isEmpty()) {
this.settingsContainer.setRecentDirectoryPathSetting(SettingsContainer.RECENT_DIRECTORY_PATH_SETTING_DEFAULT);
Importer.LOGGER.log(Level.INFO, "Recent directory could not be read, resetting to default.");
}
this.fileName = null;
}
Expand All @@ -141,6 +142,7 @@ public IAtomContainerSet importMoleculeFile(File aFile) throws NullPointerExcept
String tmpRecentDirFromContainer = this.settingsContainer.getRecentDirectoryPathSetting();
if(tmpRecentDirFromContainer == null || tmpRecentDirFromContainer.isEmpty()) {
this.settingsContainer.setRecentDirectoryPathSetting(SettingsContainer.RECENT_DIRECTORY_PATH_SETTING_DEFAULT);
Importer.LOGGER.log(Level.INFO, "Recent directory could not be read, resetting to default.");
}
String tmpFilePath = aFile.getPath();
String tmpFileExtension = FileUtil.getFileExtension(tmpFilePath);
Expand Down Expand Up @@ -196,6 +198,7 @@ public File openFile(Stage aParentStage) throws NullPointerException {
if(!tmpRecentDirectory.isDirectory()) {
tmpRecentDirectory = new File(SettingsContainer.RECENT_DIRECTORY_PATH_SETTING_DEFAULT);
this.settingsContainer.setRecentDirectoryPathSetting(SettingsContainer.RECENT_DIRECTORY_PATH_SETTING_DEFAULT);
Importer.LOGGER.log(Level.INFO, "Recent directory could not be read, resetting to default.");
}
tmpFileChooser.setInitialDirectory(tmpRecentDirectory);
File tmpFile = null;
Expand Down Expand Up @@ -260,26 +263,49 @@ else if(tmpFormat.getFormatName().equalsIgnoreCase(MDLV3000Format.getInstance().
}
//
/**
* Imports an SD file.
* Imports an SD file. If no name can be detected for a structure, the file name extended with the index of the
* structure in the file is used as name of the structure.
* NOTE: if multiple erroneous entries in a row are there in the input file, they are skipped together and not
* logged individually!
*
* @param aFile sdf
* @return the imported molecules in an IAtomContainerSet
* @throws FileNotFoundException if a file input stream cannot be opened for the given file
*/
private IAtomContainerSet importSDFile(File aFile) throws FileNotFoundException {
IAtomContainerSet tmpAtomContainerSet = new AtomContainerSet();
IteratingSDFReader tmpSDFReader = new IteratingSDFReader(new FileInputStream(aFile),
SilentChemObjectBuilder.getInstance());
/*the IteratingSDFReader is not set to skip erroneous input molecules in its constructor to be able to log them*/
IteratingSDFReader tmpSDFReader = new IteratingSDFReader(new FileInputStream(aFile), SilentChemObjectBuilder.getInstance());
int tmpCounter = 0;
while(!Thread.currentThread().isInterrupted() && tmpSDFReader.hasNext()){
while (!Thread.currentThread().isInterrupted()) {
//end of file or encountered erroneous entry
if (!tmpSDFReader.hasNext()) {
//skip if it is an erroneous entry
tmpSDFReader.setSkip(true);
if (!tmpSDFReader.hasNext()) {
// there is no next, end of file!
break;
}
// molecule just could not be read and has therefore been skipped, restore skip setting for next iteration
tmpSDFReader.setSkip(false);
Importer.LOGGER.log(Level.WARNING, "Import failed for structure:\t" + tmpCounter + " (index of structure in file).");
tmpCounter++;
}
IAtomContainer tmpAtomContainer = tmpSDFReader.next();
String tmpName = this.findMoleculeName(tmpAtomContainer);
if(tmpName == null || tmpName.isBlank() || tmpName.isEmpty())
if(tmpName == null || tmpName.isBlank()) {
// the counter here equals the index of the structure in the file
tmpName = FileUtil.getFileNameWithoutExtension(aFile) + tmpCounter;
}
tmpAtomContainer.setProperty(Importer.MOLECULE_NAME_PROPERTY_KEY, tmpName);
tmpAtomContainerSet.addAtomContainer(tmpAtomContainer);
tmpCounter++;
}
int tmpFailedImportsCount = tmpCounter - tmpAtomContainerSet.getAtomContainerCount();
if (tmpFailedImportsCount > 0) {
Importer.LOGGER.log(Level.WARNING, "The import from SD file failed for a total of " + tmpFailedImportsCount +
" structure(s).");
}
return tmpAtomContainerSet;
}
//
Expand Down Expand Up @@ -367,7 +393,7 @@ private void preprocessMoleculeSet(IAtomContainerSet aMoleculeSet) throws NullPo
}
/* note: Things like assigning bond orders and atom types here is redundant if the atom containers
are discarded after molecule set import and molecular information only represented by SMILES codes in
the molecule data models. Nevertheless it is done here to ensure that the generated SMILES codes are correct.
the molecule data models. Nevertheless, it is done here to ensure that the generated SMILES codes are correct.
*/
int tmpExceptionsCounter = 0;
for (IAtomContainer tmpMolecule : aMoleculeSet.atomContainers()) {
Expand All @@ -389,7 +415,7 @@ private void preprocessMoleculeSet(IAtomContainerSet aMoleculeSet) throws NullPo
tmpExceptionsCounter++;
}
}
Importer.LOGGER.log(Level.INFO, "Imported and preprocessed molecule set. " + tmpExceptionsCounter + " exceptions occurred.");
Importer.LOGGER.log(Level.WARNING, "Imported and preprocessed molecule set. " + tmpExceptionsCounter + " exceptions occurred while processing.");
}
//</editor-fold>
//
Expand Down
Loading