From e6a23a2ebe1d9af9a63a89770a5e91489b87d447 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Thu, 21 Jul 2022 09:11:54 +0100 Subject: [PATCH 01/13] [sdk] Add IndexerInterface::unindex(Collection) - for unindexing in bulk - clarify that both unindex methods are synchronous, unlike the indexing ones --- .../pt/ua/dicoogle/sdk/IndexerInterface.java | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java index 27eb2255a..c9b557922 100755 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java @@ -19,6 +19,8 @@ package pt.ua.dicoogle.sdk; import java.net.URI; +import java.util.Collection; + import pt.ua.dicoogle.sdk.datastructs.Report; import pt.ua.dicoogle.sdk.task.Task; @@ -51,7 +53,6 @@ public interface IndexerInterface extends DicooglePlugin { */ public Task index(Iterable files, Object... parameters); - /** * Checks whether the file in the given path can be indexed by this indexer. The indexer should verify if * the file holds compatible content (e.g. a DICOM file). If this method returns false, the file will not @@ -67,8 +68,34 @@ public default boolean handles(URI path) { /** * Removes the indexed file at the given path from the database. * + * This operation is synchronous. + * * @param path the URI of the document * @return whether it was successfully deleted from the database */ public boolean unindex(URI path); + + /** + * Removes indexed files from the database in bulk. + * + * The default implementation unindexes each item one by one + * in a non-specified order via {@linkplain #unindex(URI)}, + * but indexers may implement this as + * one or more individual operations in batch, + * thus becoming faster than unindexing each item individually. + * + * This operation is synchronous. + * Consider running long unindexing tasks in a separate thread. + * + * @param uris the URIs of the items to unindex + * @return the number of files successfully unindexed, + * in the event that some entries were not found in the database + */ + public default int unindex(Collection uris) { + int unindexed = 0; + for (URI uri : uris) { + unindexed += unindex(uri) ? 1 : 0; + } + return unindexed; + } } From f1e1762b7b1306bf60217971640c4783f33c07c8 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Thu, 21 Jul 2022 09:27:44 +0100 Subject: [PATCH 02/13] [sdk] Revamp IndexerInterface documentation - adjust the content to align with good practices (see also https://bioinformatics-ua.github.io/dicoogle-learning-pack/docs/query_index/) --- .../pt/ua/dicoogle/sdk/IndexerInterface.java | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java index c9b557922..e826f6bbd 100755 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java @@ -25,41 +25,58 @@ import pt.ua.dicoogle.sdk.task.Task; /** - * Index Interface Plugin. Indexers analyze documents for performing queries. They may index - * documents by DICOM metadata for instance, but other document processing procedures may be involved. + * Indexing plugin interface. + * + * Indexers analyze and record documents for future retrieval. + * They are primarily designed to index DICOM meta-data, + * which in that case they are accompanied by a query plugin, + * and both plugins are called DIM providers. + * However, indexers are not restricted to processing DICOM files, + * or to retrieving and indexing meta-data. * - * @author Luís A. Bastião Silva + * @author Luís A. Bastião Silva * @author Frederico Valente */ public interface IndexerInterface extends DicooglePlugin { /** - * Indexes the file path to the database. Indexation procedures are asynchronous, and will return + * Indexes the file path to the database. Indexing procedures are asynchronous, and will return * immediately after the call. The outcome is a report that can be retrieved from the given task * as a future. * * @param file directory or file to index - * @return a representation of the asynchronous indexation task + * @return a representation of the asynchronous indexing task */ public Task index(StorageInputStream file, Object... parameters); /** - * Indexes multiple file paths to the database. Indexation procedures are asynchronous, and will return + * Indexes multiple file paths to the database. Indexing procedures are asynchronous, and will return * immediately after the call. The outcomes are aggregated into a single report and can be retrieved from * the given task as a future. * * @param files a collection of directories and/or files to index - * @return a representation of the asynchronous indexation task + * @return a representation of the asynchronous indexing task */ public Task index(Iterable files, Object... parameters); /** - * Checks whether the file in the given path can be indexed by this indexer. The indexer should verify if - * the file holds compatible content (e.g. a DICOM file). If this method returns false, the file will not - * be indexed. - * + * Checks whether the file in the given path can be indexed by this indexer. + * + * The method should return false if and only if + * it is sure that the file cannot be indexed, + * by observation of its URI. + * This method exists in order to filter out files + * that are obviously incompatible for the indexer. + * However, there are situations where this is not reliable, + * since the storage is free to establish its own file naming rules, + * and that can affect the file extension. + * In case of doubt, it is recommended to leave the default implementation, + * which returns true unconditionally. + * Attempts to read invalid files can instead + * be handled gracefully by the indexer by capturing exceptions. + * * @param path a URI to the file to check - * @return whether the indexer can handle the file at the given path + * @return whether the item at the given URI path can be fed to this indexer */ public default boolean handles(URI path) { return true; From 8b7cb759cbbe69dc414b97c78d75b608e5fdbd2b Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Fri, 22 Jul 2022 12:12:20 +0100 Subject: [PATCH 03/13] [sdk] rethink bulk unindexing to be more informative - add `UnindexReport` class and nested classes - for containing errors which may occur in bulk unindexing - change `IndexerInterface#unindex(Collection)` - returns `UnindexReport` - can throw `IOException` --- .../pt/ua/dicoogle/sdk/IndexerInterface.java | 29 +++- .../sdk/datastructs/UnindexReport.java | 125 ++++++++++++++++++ 2 files changed, 148 insertions(+), 6 deletions(-) create mode 100644 sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java index e826f6bbd..e053a6bc0 100755 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java @@ -18,10 +18,16 @@ */ package pt.ua.dicoogle.sdk; +import java.io.IOException; import java.net.URI; +import java.util.ArrayList; import java.util.Collection; +import java.util.List; +import java.util.Objects; import pt.ua.dicoogle.sdk.datastructs.Report; +import pt.ua.dicoogle.sdk.datastructs.UnindexReport; +import pt.ua.dicoogle.sdk.datastructs.UnindexReport.FailedUnindex; import pt.ua.dicoogle.sdk.task.Task; /** @@ -105,14 +111,25 @@ public default boolean handles(URI path) { * Consider running long unindexing tasks in a separate thread. * * @param uris the URIs of the items to unindex - * @return the number of files successfully unindexed, - * in the event that some entries were not found in the database + * @return a report containing which files were not unindexed, + * and whether some of them were not found in the database + * @throws IOException if an error occurred + * before the unindexing operation could start, + * such as when failing to access or open the database */ - public default int unindex(Collection uris) { - int unindexed = 0; + public default UnindexReport unindex(Collection uris) throws IOException { + Objects.requireNonNull(uris); + List failures = new ArrayList<>(); for (URI uri : uris) { - unindexed += unindex(uri) ? 1 : 0; + try { + if (!unindex(uri)) { + // failed to unindex, reason unknown + failures.add(new FailedUnindex(uri, null)); + } + } catch (Exception ex) { + failures.add(new FailedUnindex(uri, ex)); + } } - return unindexed; + return UnindexReport.withFailures(failures); } } diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java new file mode 100644 index 000000000..c0d81074e --- /dev/null +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java @@ -0,0 +1,125 @@ +/** + * Copyright (C) 2014 Universidade de Aveiro, DETI/IEETA, Bioinformatics Group - http://bioinformatics.ua.pt/ + * + * This file is part of Dicoogle/dicoogle-sdk. + * + * Dicoogle/dicoogle-sdk is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Dicoogle/dicoogle-sdk is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Dicoogle. If not, see . + */ +package pt.ua.dicoogle.sdk.datastructs; + +import java.io.Serializable; +import java.net.URI; +import java.util.Collection; +import java.util.Collections; +import java.util.Objects; + +/** Describes a report for a bulk unindexing operation. + */ +public final class UnindexReport implements Serializable { + + /** The description of a file which + * could not be unindexed due to an error. + * + * When an error of this kind occurs, + * it is not specified whether the file remains indexed or not. + */ + public static final class FailedUnindex implements Serializable { + /** The URI to the item which failed to unindex. */ + public final URI uri; + + /** The exception describing the error which led to the failure. + * This field can be null + * when no cause is specified. + */ + public final Exception cause; + + /** Creates a failed unindex description + * due to the file not being found in the database. + * + * @param uri the URI of the file which could not be unindexed + * @param cause the underlying exception, if any + */ + public FailedUnindex(URI uri, Exception cause) { + Objects.requireNonNull(uri); + this.uri = uri; + this.cause = cause; + } + } + + /** URIs of files which were not found. */ + private final Collection notFound; + private final Collection failures; + + /** Creates a full report for a bulk unindexing operation. + * All parameters are nullable, + * in which case is equivalent to passing an empty collection. + * @param notFound the URIs of files which were not found + * @param failures the error reports of files which could not be unindexed + */ + public UnindexReport(Collection notFound, Collection failures) { + if (notFound == null) { + notFound = Collections.emptyList(); + } + if (failures == null) { + failures = Collections.emptyList(); + } + this.notFound = notFound; + this.failures = failures; + } + + /** Creates a report that all files were successfully unindexed. */ + public static UnindexReport ok() { + return new UnindexReport(null, null); + } + + /** Creates a report with the files which failed to unindex + * due to some error. + */ + public static UnindexReport withFailures(Collection failures) { + return new UnindexReport(null, failures); + } + + /** Returns whether all files were successfully unindexed from the database + * as requested. + */ + public boolean isOk() { + return notFound.isEmpty() && failures.isEmpty(); + } + + /** Returns whether all files are no longer unindexed, + * meaning that no errors occurred when trying to unindex an indexed file. + * + * This is different from {@link #isOk()} in that + * it does not imply that all files to unindex were found in the database. + * + * @return true if no unindex failures are reported other than files not found + */ + public boolean allUnindexed() { + return failures.isEmpty(); + } + + /** Obtains an immutable collection to + * the files which failed to unindex due to an error. + */ + public Collection getUnindexFailures() { + return Collections.unmodifiableCollection(this.failures); + } + + /** Obtains an immutable collection to the files + * which were not found in the index. + */ + public Collection getNotFound() { + return Collections.unmodifiableCollection(this.notFound); + } +} From 76493aa7e48798539a1fcd16eeaadf75157e6749 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Sat, 15 Oct 2022 14:42:56 +0100 Subject: [PATCH 04/13] [sdk] reiterate on IndexerInterface batch unindex - make it asynchronous: returns a `Task` like in `index` - add second parameter for keeping track of progress --- .../pt/ua/dicoogle/sdk/IndexerInterface.java | 48 +++++++++++++------ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java index e053a6bc0..3b50bd24a 100755 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java @@ -22,8 +22,11 @@ import java.net.URI; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Objects; +import java.util.concurrent.Callable; +import java.util.function.Consumer; import pt.ua.dicoogle.sdk.datastructs.Report; import pt.ua.dicoogle.sdk.datastructs.UnindexReport; @@ -91,7 +94,9 @@ public default boolean handles(URI path) { /** * Removes the indexed file at the given path from the database. * - * This operation is synchronous. + * Unlike the other indexing tasks, + * this operation is synchronous + * and will only return when the operation is done. * * @param path the URI of the document * @return whether it was successfully deleted from the database @@ -107,29 +112,44 @@ public default boolean handles(URI path) { * one or more individual operations in batch, * thus becoming faster than unindexing each item individually. * - * This operation is synchronous. - * Consider running long unindexing tasks in a separate thread. + * Like {@linkplain index}, + * this operation is asynchronous. + * One can keep track of the unindexing task's progress + * by passing a callback function as the second parameter. * * @param uris the URIs of the items to unindex + * @param progressCallback an optional function (can be `null`), + * called for every batch of items successfully unindexed + * to indicate early progress + * and inform consumers that + * it is safe to remove or exclude the unindexed item * @return a report containing which files were not unindexed, * and whether some of them were not found in the database * @throws IOException if an error occurred * before the unindexing operation could start, * such as when failing to access or open the database */ - public default UnindexReport unindex(Collection uris) throws IOException { + public default Task unindex(Collection uris, Consumer> progressCallback) + throws IOException { Objects.requireNonNull(uris); - List failures = new ArrayList<>(); - for (URI uri : uris) { - try { - if (!unindex(uri)) { - // failed to unindex, reason unknown - failures.add(new FailedUnindex(uri, null)); + return new Task<>(() -> { + List failures = new ArrayList<>(); + for (URI uri : uris) { + try { + if (unindex(uri)) { + // unindexed successfully + if (progressCallback != null) { + progressCallback.accept(Collections.singleton(uri)); + } + } else { + // failed to unindex, reason unknown + failures.add(new FailedUnindex(uri, null)); + } + } catch (Exception ex) { + failures.add(new FailedUnindex(uri, ex)); } - } catch (Exception ex) { - failures.add(new FailedUnindex(uri, ex)); } - } - return UnindexReport.withFailures(failures); + return UnindexReport.withFailures(failures); + }); } } From 4c9959947683765afc43867778c9138253f95e20 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Sat, 15 Oct 2022 14:43:06 +0100 Subject: [PATCH 05/13] [sdk] format UnindexReport --- .../pt/ua/dicoogle/sdk/datastructs/UnindexReport.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java index c0d81074e..ddc360d80 100644 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java @@ -43,7 +43,7 @@ public static final class FailedUnindex implements Serializable { * when no cause is specified. */ public final Exception cause; - + /** Creates a failed unindex description * due to the file not being found in the database. * @@ -54,9 +54,9 @@ public FailedUnindex(URI uri, Exception cause) { Objects.requireNonNull(uri); this.uri = uri; this.cause = cause; - } + } } - + /** URIs of files which were not found. */ private final Collection notFound; private final Collection failures; @@ -66,7 +66,7 @@ public FailedUnindex(URI uri, Exception cause) { * in which case is equivalent to passing an empty collection. * @param notFound the URIs of files which were not found * @param failures the error reports of files which could not be unindexed - */ + */ public UnindexReport(Collection notFound, Collection failures) { if (notFound == null) { notFound = Collections.emptyList(); From 0c067e2679e59a54eec1ef0bb317b4b57a152273 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Fri, 21 Jul 2023 12:17:33 +0100 Subject: [PATCH 06/13] [sdk] Improve bulk IndexerInterface#unindex - clarify that it returns a task - remove unused import --- sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java index 3b50bd24a..475d33eb0 100755 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java @@ -25,7 +25,6 @@ import java.util.Collections; import java.util.List; import java.util.Objects; -import java.util.concurrent.Callable; import java.util.function.Consumer; import pt.ua.dicoogle.sdk.datastructs.Report; @@ -123,7 +122,8 @@ public default boolean handles(URI path) { * to indicate early progress * and inform consumers that * it is safe to remove or exclude the unindexed item - * @return a report containing which files were not unindexed, + * @return an asynchronous task object returning + * a report containing which files were not unindexed, * and whether some of them were not found in the database * @throws IOException if an error occurred * before the unindexing operation could start, From 680c1544c8b6c495cc913a6263657f04d1ac426e Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Fri, 21 Jul 2023 12:41:22 +0100 Subject: [PATCH 07/13] [sdk] Add UnindexReport#errorCount --- .../java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java index ddc360d80..9777a4d21 100644 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java @@ -122,4 +122,11 @@ public Collection getUnindexFailures() { public Collection getNotFound() { return Collections.unmodifiableCollection(this.notFound); } + + /** Return the total count of files which were requested to be unindexed, + * but were either not found or failed to unindex. + */ + public long errorCount() { + return this.failures.size() + this.notFound.size(); + } } From 38d7640fefdb1499e913970804d9f6fee1576bca Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Fri, 21 Jul 2023 12:52:46 +0100 Subject: [PATCH 08/13] Add bulk unindexing to plugin controller - can only handle one indexer at a time, but other than that it works --- .../ua/dicoogle/plugins/PluginController.java | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java index 62b7acde7..f86199ecb 100755 --- a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java +++ b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java @@ -28,6 +28,7 @@ import pt.ua.dicoogle.plugins.webui.WebUIPluginManager; import pt.ua.dicoogle.sdk.*; import pt.ua.dicoogle.sdk.datastructs.Report; +import pt.ua.dicoogle.sdk.datastructs.UnindexReport; import pt.ua.dicoogle.sdk.datastructs.SearchResult; import pt.ua.dicoogle.sdk.datastructs.dim.DimLevel; import pt.ua.dicoogle.sdk.settings.ConfigurationHolder; @@ -45,6 +46,7 @@ import java.util.*; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; +import java.util.function.Consumer; import java.util.stream.Collectors; import java.util.zip.ZipFile; @@ -761,7 +763,34 @@ public void unindex(URI path, Collection indexProviders) { } } - /** Issue an unindexation procedure to the given indexers. + /** Issue the removal of indexed entries in bulk. + * + * @param indexProvider the name of the indexer + * @param items a collections of item identifiers to unindex + * @param progressCallback an optional function (can be `null`), + * called for every batch of items successfully unindexed + * to indicate early progress + * and inform consumers that + * it is safe to remove or exclude the unindexed item + * @return an asynchronous task object returning + * a report containing which files were not unindexed, + * and whether some of them were not found in the database + * @throws IOException + */ + public Task unindex(String indexProvider, Collection items, Consumer> progressCallback) throws IOException { + logger.info("Starting unindexing procedure for {} items", items.size()); + + IndexerInterface indexer = null; + if (indexProvider != null) { + indexer = this.getIndexerByName(indexProvider, true); + } + if (indexer == null) { + indexer = this.getIndexingPlugins(true).iterator().next(); + } + return indexer.unindex(items, progressCallback); + } + + /** Issue an unindexing procedure to the given indexers. * * @param path the URI of the directory or file to unindex * @param indexers a collection of providers From 483f8149b061dda1acc6205135f1aee9408c0d6e Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Fri, 21 Jul 2023 12:54:04 +0100 Subject: [PATCH 09/13] Tweak PluginController - remove deprecated method call #handles, check scheme instead --- .../src/main/java/pt/ua/dicoogle/plugins/PluginController.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java index f86199ecb..3965ae941 100755 --- a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java +++ b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java @@ -811,7 +811,7 @@ public void remove(URI uri) { } public void doRemove(URI uri, StorageInterface si) { - if (si.handles(uri)) { + if (Objects.equals(uri.getScheme(), si.getScheme())) { si.remove(uri); } else { logger.warn("Storage Plugin does not handle URI: {},{}", uri, si); From 1b9980427a828b73e507416e8d3550cf3191d3d0 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Fri, 21 Jul 2023 12:54:47 +0100 Subject: [PATCH 10/13] Update unindex servlet to use bulk unindexing where appropriate --- .../servlets/management/UnindexServlet.java | 61 ++++++++++++++----- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/management/UnindexServlet.java b/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/management/UnindexServlet.java index f98f4b879..55143ecfb 100644 --- a/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/management/UnindexServlet.java +++ b/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/management/UnindexServlet.java @@ -21,12 +21,13 @@ import java.io.IOException; import java.net.URI; -import java.net.URISyntaxException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.stream.StreamSupport; import javax.servlet.ServletException; @@ -42,6 +43,7 @@ import pt.ua.dicoogle.plugins.PluginController; import pt.ua.dicoogle.sdk.QueryInterface; import pt.ua.dicoogle.sdk.datastructs.SearchResult; +import pt.ua.dicoogle.sdk.datastructs.UnindexReport; import pt.ua.dicoogle.sdk.task.JointQueryTask; import pt.ua.dicoogle.sdk.task.Task; @@ -81,26 +83,50 @@ protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws S "No arguments provided; must include either one of `uri`, `SOPInstanceUID`, `SeriesInstanceUID` or `StudyInstanceUID`"); return; } + + PluginController pc = PluginController.getInstance(); long indexed = 0; long failed = 0; + long notfound = 0; - Collection uris = resolveURIs(paramUri, paramSop, paramSeries, paramStudy); + Collection uris = resolveURIs(paramUri, paramSop, paramSeries, paramStudy); - // unindex - for (String strUri : uris) { - try { - URI uri = new URI(strUri); + // if only one entry, do it inline + if (uris.size() <= 1) { + for (URI uri : uris) { try { - PluginController.getInstance().unindex(uri, providers); + pc.unindex(uri, providers); indexed += 1; } catch (RuntimeException ex) { logger.error("Failed to unindex {}", uri, ex); failed += 1; } - } catch (URISyntaxException ex) { - logger.warn("Received bad URI", ex); - failed += 1; + } + + } else { + // if many, use bulk unindexing + List> tasks = new ArrayList<>(); + + if (providers == null) { + providers = pc.getIndexingPlugins(true).stream() + .map(p -> p.getName()) + .collect(Collectors.toList()); + } + for (String indexProvider: providers) { + tasks.add(pc.unindex(indexProvider, uris, null)); + } + + int i = 0; + for (Task task: tasks) { + try { + UnindexReport report = task.get(); + indexed = uris.size() - report.errorCount(); + failed = report.getUnindexFailures().size(); + notfound = report.getNotFound().size(); + } catch (Exception ex) { + logger.error("Task to unindex items in {} failed", providers.get(i), ex); + } } } @@ -109,15 +135,18 @@ protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws S JSONObject obj = new JSONObject(); obj.put("indexed", indexed); obj.put("failed", failed); + obj.put("notFound", notfound); resp.setStatus(200); resp.getWriter().write(obj.toString()); } /// Convert the given parameters into a list of URIs - private static Collection resolveURIs(String[] paramUri, String[] paramSop, String[] paramSeries, + private static Collection resolveURIs(String[] paramUri, String[] paramSop, String[] paramSeries, String[] paramStudy) { if (paramUri != null) { - return Arrays.asList(paramUri); + return Stream.of(paramUri) + .map(URI::create) + .collect(Collectors.toList()); } String attribute = null; if (paramSop != null) { @@ -142,11 +171,11 @@ public void onCompletion() {} }; try { return StreamSupport.stream(PluginController.getInstance() - .queryAll(holder, dcmAttribute + ":" + uid).get().spliterator(), false); + .queryAll(holder, dcmAttribute + ":\"" + uid + '"').get().spliterator(), false); } catch (InterruptedException | ExecutionException ex) { throw new RuntimeException(ex); } - }).map(r -> r.getURI().toString()).collect(Collectors.toList()); + }).map(r -> r.getURI()).collect(Collectors.toList()); } String dicomProvider = dicomProviders.iterator().next(); @@ -154,7 +183,7 @@ public void onCompletion() {} // translate to URIs QueryInterface dicom = PluginController.getInstance().getQueryProviderByName(dicomProvider, false); - return StreamSupport.stream(dicom.query(dcmAttribute + ":" + uid).spliterator(), false); - }).map(r -> r.getURI().toString()).collect(Collectors.toList()); + return StreamSupport.stream(dicom.query(dcmAttribute + ":\"" + uid + '"').spliterator(), false); + }).map(r -> r.getURI()).collect(Collectors.toList()); } } From 70ce1b0f5d591e8bb6936caa10e3954e045a0956 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Thu, 7 Mar 2024 12:06:31 +0000 Subject: [PATCH 11/13] [core] Dispatch batch-unindex tasks --- .../java/pt/ua/dicoogle/plugins/PluginController.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java index 3965ae941..d782760a7 100755 --- a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java +++ b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java @@ -787,7 +787,16 @@ public Task unindex(String indexProvider, Collection items, if (indexer == null) { indexer = this.getIndexingPlugins(true).iterator().next(); } - return indexer.unindex(items, progressCallback); + Task task = indexer.unindex(items, progressCallback); + if (task != null) { + final String taskUniqueID = UUID.randomUUID().toString(); + task.setName(String.format("[%s]unindex", indexer.getName())); + task.onCompletion(() -> { + logger.info("Unindexing task [{}] complete", taskUniqueID); + }); + taskManager.dispatch(task); + } + return task; } /** Issue an unindexing procedure to the given indexers. From 216b11fb603680f6674e70fc633abd0ba69e6876 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Wed, 17 Apr 2024 15:52:05 +0100 Subject: [PATCH 12/13] [sdk] Reiterate on the UnindexReport API - record a collection of URIs in each unindex failure --- .../pt/ua/dicoogle/sdk/IndexerInterface.java | 4 +- .../sdk/datastructs/UnindexReport.java | 39 +++++++++++-------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java index 475d33eb0..e7df68890 100755 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java @@ -143,10 +143,10 @@ public default Task unindex(Collection uris, Consumer urisAffected; /** The exception describing the error which led to the failure. * This field can be null @@ -50,11 +50,16 @@ public static final class FailedUnindex implements Serializable { * @param uri the URI of the file which could not be unindexed * @param cause the underlying exception, if any */ - public FailedUnindex(URI uri, Exception cause) { - Objects.requireNonNull(uri); - this.uri = uri; + public FailedUnindex(Collection urisAffected, Exception cause) { + Objects.requireNonNull(urisAffected); + this.urisAffected = urisAffected; this.cause = cause; } + + @Override + public String toString() { + return "FailedUnindex{urisAffected=" + urisAffected + ", cause=" + cause + "}"; + } } /** URIs of files which were not found. */ @@ -64,6 +69,8 @@ public FailedUnindex(URI uri, Exception cause) { /** Creates a full report for a bulk unindexing operation. * All parameters are nullable, * in which case is equivalent to passing an empty collection. + * Once created, the report is final and immutable. + * * @param notFound the URIs of files which were not found * @param failures the error reports of files which could not be unindexed */ @@ -78,13 +85,13 @@ public UnindexReport(Collection notFound, Collection failure this.failures = failures; } - /** Creates a report that all files were successfully unindexed. */ + /** Creates a report with no unindexing failures. + */ public static UnindexReport ok() { return new UnindexReport(null, null); } - /** Creates a report with the files which failed to unindex - * due to some error. + /** Creates a report with the given failures. */ public static UnindexReport withFailures(Collection failures) { return new UnindexReport(null, failures); @@ -110,7 +117,7 @@ public boolean allUnindexed() { } /** Obtains an immutable collection to - * the files which failed to unindex due to an error. + * the file batches which failed to unindex due to errors. */ public Collection getUnindexFailures() { return Collections.unmodifiableCollection(this.failures); @@ -123,8 +130,8 @@ public Collection getNotFound() { return Collections.unmodifiableCollection(this.notFound); } - /** Return the total count of files which were requested to be unindexed, - * but were either not found or failed to unindex. + /** Returns the total count of errors reported during unindexing + * due to either not having been found or other failures. */ public long errorCount() { return this.failures.size() + this.notFound.size(); From 471566cbf94740178c426d4d3d8d4b3b6131183e Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Mon, 17 Jun 2024 15:59:00 +0100 Subject: [PATCH 13/13] [sdk] Tweak UnindexReport interface and fix error file count - provide clearer methods to collect the counts of files which were not unindexed successfully --- .../servlets/management/UnindexServlet.java | 4 +-- .../sdk/datastructs/UnindexReport.java | 28 ++++++++++++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/management/UnindexServlet.java b/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/management/UnindexServlet.java index 55143ecfb..02e14f824 100644 --- a/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/management/UnindexServlet.java +++ b/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/management/UnindexServlet.java @@ -121,8 +121,8 @@ protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws S for (Task task: tasks) { try { UnindexReport report = task.get(); - indexed = uris.size() - report.errorCount(); - failed = report.getUnindexFailures().size(); + indexed = uris.size() - report.notUnindexedFileCount(); + failed = report.failedFileCount(); notfound = report.getNotFound().size(); } catch (Exception ex) { logger.error("Task to unindex items in {} failed", providers.get(i), ex); diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java index 2e9109f86..34ebcc952 100644 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java @@ -130,10 +130,30 @@ public Collection getNotFound() { return Collections.unmodifiableCollection(this.notFound); } - /** Returns the total count of errors reported during unindexing - * due to either not having been found or other failures. + /** Returns the total count of failures reported during unindexing. + * + * Note that this does not necessarily correspond to + * the number of files affected, + * and does not include files which were not found. + */ + public long failureCount() { + return this.failures.size(); + } + + /** Returns the total count of files which were not unindexed, + * whether because they were not found + * or could not be unindexed for other reasons. + */ + public long notUnindexedFileCount() { + return this.notFound.size() + failedFileCount(); + } + + /** Returns the total count of files which failed to unindexed + * for reasons other than the files not being found. */ - public long errorCount() { - return this.failures.size() + this.notFound.size(); + public long failedFileCount() { + return this.failures.stream() + .mapToLong(f -> f.urisAffected.size()) + .sum(); } }