aiidateam · chrisjsewell · Oct 28, 2020 · Oct 28, 2020 · Oct 28, 2020 · Oct 28, 2020
diff --git a/aiida/common/progress_reporter.py b/aiida/common/progress_reporter.py
@@ -50,9 +50,13 @@ def __init__(self, *, total: int, desc: Optional[str] = None, **kwargs: Any):
         :param desc: A description of the process
 
         """
-        self.total = total
-        self.desc = desc
-        self.increment = 0
+        self._total = total
+        self._desc = desc
+        self._increment = 0
+
+    @property
+    def total(self) -> int:
+        return self._total
 
     def __enter__(self) -> 'ProgressReporterAbstract':
         """Enter the contextmanager."""
@@ -71,15 +75,25 @@ def set_description_str(self, text: Optional[str] = None, refresh: bool = True):
         :param refresh: Force refresh of the progress reporter
 
         """
-        self.desc = text
+        self._desc = text
 
     def update(self, n: int = 1):  # pylint: disable=invalid-name
         """Update the progress counter.
 
         :param n: Increment to add to the internal counter of iterations
 
         """
-        self.increment += n
+        self._increment += n
+
+    def reset(self, total: Optional[int] = None):
+        """Resets current iterations to 0.
+
+        :param total: If not None, update number of expected iterations.
+
+        """
+        self._increment = 0
+        if total is not None:
+            self._total = total
 
 
 class ProgressReporterNull(ProgressReporterAbstract):

diff --git a/aiida/tools/importexport/archive/readers.py b/aiida/tools/importexport/archive/readers.py
@@ -13,7 +13,7 @@
 import os
 from pathlib import Path
 from types import TracebackType
-from typing import Any, cast, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Type
+from typing import Any, Callable, cast, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Type
 import zipfile
 import tarfile
 
@@ -22,7 +22,6 @@
 from aiida.common.log import AIIDA_LOGGER
 from aiida.common.exceptions import InvalidOperation
 from aiida.common.folders import Folder, SandboxFolder
-from aiida.common.progress_reporter import get_progress_reporter
 from aiida.tools.importexport.common.config import EXPORT_VERSION, ExportFileFormat, NODES_EXPORT_SUBFOLDER
 from aiida.tools.importexport.common.exceptions import (CorruptArchive, IncompatibleArchiveVersionError)
 from aiida.tools.importexport.archive.common import ArchiveMetadata
@@ -184,15 +183,21 @@ def iter_link_data(self) -> Iterator[dict]:
         """Iterate over links: {'input': <UUID>, 'output': <UUID>, 'label': <LABEL>, 'type': <TYPE>}"""
 
     @abstractmethod
-    def iter_node_repos(self,
-                        uuids: Iterable[str],
-                        progress: bool = True,
-                        description='Iterating node repos') -> Iterator[Folder]:
+    def iter_node_repos(
+        self,
+        uuids: Iterable[str],
+        callback: Optional[Callable[[str, Any], None]] = None,
+    ) -> Iterator[Folder]:
         """Yield temporary folders containing the contents of the repository for each node.
 
         :param uuids: UUIDs of the nodes over whose repository folders to iterate
-        :param progress: report progress
-        :param description: description for progress report
+        :param callback: a callback to report on the process, ``callback(action, value)``,
+            with the following callback signatures:
+
+            - ``callback('init', {'total': <int>, 'description': <str>})``,
+               to signal the start of a process, its total iterations and description
+            - ``callback('update', <int>)``,
+               to signal an update to the process and the number of iterations to progress
 
         :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: If the repository does not exist.
         """
@@ -204,7 +209,7 @@ def node_repository(self, uuid: str) -> Folder:
 
         :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: If the repository does not exist.
         """
-        return next(self.iter_node_repos([uuid], progress=False))
+        return next(self.iter_node_repos([uuid]))
 
 
 class ReaderJsonBase(ArchiveReaderAbstract):
@@ -259,11 +264,17 @@ def _get_data(self):
         """Retrieve the data JSON."""
         raise NotImplementedError()
 
-    def _extract(self, *, path_prefix: str, progress: bool):
+    def _extract(self, *, path_prefix: str, callback: Optional[Callable[[str, Any], None]] = None):
         """Extract repository data to a temporary folder.
 
         :param path_prefix: Only extract paths starting with this prefix.
-        :param progress: Whether to report progress of the extraction
+        :param callback: a callback to report on the process, ``callback(action, value)``,
+            with the following callback signatures:
+
+            - ``callback('init', {'total': <int>, 'description': <str>})``,
+               to signal the start of a process, its total iterations and description
+            - ``callback('update', <int>)``,
+               to signal an update to the process and the number of iterations to progress
 
         :raises TypeError: if parameter types are not respected
         """
@@ -343,10 +354,11 @@ def iter_link_data(self) -> Iterator[dict]:
         for value in self._get_data()['links_uuid']:
             yield value
 
-    def iter_node_repos(self,
-                        uuids: Iterable[str],
-                        progress: bool = True,
-                        description='Iterating node repositories') -> Iterator[Folder]:
+    def iter_node_repos(
+        self,
+        uuids: Iterable[str],
+        callback: Optional[Callable[[str, Any], None]] = None,
+    ) -> Iterator[Folder]:
         path_prefixes = [os.path.join(self.REPO_FOLDER, export_shard_uuid(uuid)) for uuid in uuids]
 
         if not path_prefixes:
@@ -357,26 +369,19 @@ def iter_node_repos(self,
         # unarchive the common folder if it does not exist
         common_prefix = os.path.commonprefix(path_prefixes)
         if not self._sandbox.get_subfolder(common_prefix).exists():
-            self._extract(path_prefix=common_prefix, progress=progress)
-
-        if progress:
-            with get_progress_reporter()(total=len(path_prefixes), desc=description) as report:
-                for uuid, path_prefix in zip(uuids, path_prefixes):
-                    report.update()
-                    subfolder = self._sandbox.get_subfolder(path_prefix)
-                    if not subfolder.exists():
-                        raise CorruptArchive(
-                            f'Unable to find the repository folder for Node with UUID={uuid} in the archive'
-                        )
-                    yield subfolder
-        else:
-            for uuid, path_prefix in zip(uuids, path_prefixes):
-                subfolder = self._sandbox.get_subfolder(path_prefix)
-                if not subfolder.exists():
-                    raise CorruptArchive(
-                        f'Unable to find the repository folder for Node with UUID={uuid} in the exported file'
-                    )
-                yield subfolder
+            self._extract(path_prefix=common_prefix, callback=callback)
+
+        if callback is not None:
+            callback('init', {'total': len(path_prefixes), 'description': 'Iterating node repositories'})
+        for uuid, path_prefix in zip(uuids, path_prefixes):
+            if callback is not None:
+                callback('update', 1)
+            subfolder = self._sandbox.get_subfolder(path_prefix)
+            if not subfolder.exists():
+                raise CorruptArchive(
+                    f'Unable to find the repository folder for Node with UUID={uuid} in the exported file'
+                )
+            yield subfolder
 
 
 class ReaderJsonZip(ReaderJsonBase):
@@ -410,17 +415,17 @@ def _get_data(self):
                 raise CorruptArchive(f'required file {self.FILENAME_DATA} is not included')
         return self._data
 
-    def _extract(self, *, path_prefix: str, progress: bool):
+    def _extract(self, *, path_prefix: str, callback: Optional[Callable[[str, Any], None]] = None):
         self.assert_within_context()
         assert self._sandbox is not None  # required by mypy
         try:
             with zipfile.ZipFile(self.filename, 'r', allowZip64=True) as handle:
                 members = [m for m in handle.namelist() if m.startswith(path_prefix)]
-                if progress:
-                    with get_progress_reporter()(total=len(members), desc='Extracting repository files') as report:
-                        for membername in members:
-                            report.update()
-                            handle.extract(path=self._sandbox.abspath, member=membername)
+                if callback is not None:
+                    callback('init', {'total': len(members), 'description': 'Extracting repository files'})
+                    for membername in members:
+                        callback('update', 1)
+                        handle.extract(path=self._sandbox.abspath, member=membername)
                 else:
                     for membername in members:
                         handle.extract(path=self._sandbox.abspath, member=membername)
@@ -457,29 +462,28 @@ def _get_data(self):
                 raise CorruptArchive(f'required file `{self.FILENAME_DATA}` is not included')
         return self._data
 
-    def _extract(self, *, path_prefix: str, progress: bool):
+    def _extract(self, *, path_prefix: str, callback: Optional[Callable[[str, Any], None]] = None):
         self.assert_within_context()
         assert self._sandbox is not None  # required by mypy
         try:
             with tarfile.open(self.filename, 'r:*', format=tarfile.PAX_FORMAT) as handle:
                 members = [m for m in handle.getmembers() if m.name.startswith(path_prefix)]
-                if progress:
-                    with get_progress_reporter()(total=len(members), desc='Extracting repository files') as report:
-                        for member in members:
-                            report.update()
-                            if member.isdev():
-                                # safety: skip if character device, block device or FIFO
-                                msg = f'WARNING, device found inside the import file: {member.name}'
-                                ARCHIVE_READER_LOGGER.warning(msg)
-                            if member.issym() or member.islnk():
-                                # safety: although dereference=True set in export, so this should not occur
-                                msg = f'WARNING, symlink found inside the import file: {member.name}'
-                                ARCHIVE_READER_LOGGER.warning(msg)
-                                continue
-                            handle.extract(path=self._sandbox.abspath, member=member.name)
-                else:
-                    for membername in members:
-                        handle.extract(path=self._sandbox.abspath, member=membername)
+                if callback is not None:
+                    callback('init', {'total': len(members), 'description': 'Extracting repository files'})
+                for member in members:
+                    if callback is not None:
+                        callback('update', 1)
+                    if member.isdev():
+                        # safety: skip if character device, block device or FIFO
+                        msg = f'WARNING, device found inside the import file: {member.name}'
+                        ARCHIVE_READER_LOGGER.warning(msg)
+                        continue
+                    if member.issym() or member.islnk():
+                        # safety: although dereference=True set in export, so this should not occur
+                        msg = f'WARNING, symlink found inside the import file: {member.name}'
+                        ARCHIVE_READER_LOGGER.warning(msg)
+                        continue
+                    handle.extract(path=self._sandbox.abspath, member=member.name)
         except zipfile.BadZipfile:
             raise TypeError('The input file format is not valid (not a zip file)')
 
@@ -507,7 +511,7 @@ def _get_data(self):
             self._data = json.loads(path.read_text(encoding='utf8'))
         return self._data
 
-    def _extract(self, *, path_prefix: str, progress: bool):
+    def _extract(self, *, path_prefix: str, callback: Optional[Callable[[str, Any], None]] = None):
         # pylint: disable=unused-argument
         self.assert_within_context()
         assert self._sandbox is not None  # required by mypy

diff --git a/aiida/tools/importexport/dbimport/backends/common.py b/aiida/tools/importexport/dbimport/backends/common.py
@@ -9,11 +9,14 @@
 ###########################################################################
 """Common import functions for both database backend"""
 import copy
-from typing import Dict, Optional
+from typing import Dict, List, Optional
 
 from aiida.common import timezone
+from aiida.common.folders import RepositoryFolder
 from aiida.common.progress_reporter import get_progress_reporter
 from aiida.orm import Group, ImportGroup, Node, QueryBuilder
+from aiida.orm.utils._repository import Repository
+from aiida.tools.importexport.archive.readers import ArchiveReaderAbstract
 from aiida.tools.importexport.common import exceptions
 from aiida.tools.importexport.common.config import NODE_ENTITY_NAME
 from aiida.tools.importexport.dbimport.utils import IMPORT_LOGGER
@@ -22,6 +25,34 @@
 MAX_GROUPS = 100
 
 
+def _copy_node_repositories(*, uuids_to_create: List[str], reader: ArchiveReaderAbstract):
+    """Copy repositories of new nodes from the archive to the AiiDa profile.
+
+    :param uuids_to_create: the node UUIDs to copy
+    :param reader: the archive reader
+
+    """
+    if not uuids_to_create:
+        return
+    IMPORT_LOGGER.debug('CREATING NEW NODE REPOSITORIES...')
+    with get_progress_reporter()(total=1) as progress:
+
+        def _callback(action, value):
+            if action == 'init':
+                progress.reset(value['total'])
+                progress.set_description_str(value['description'])
+            elif action == 'update':
+                progress.update(value)
+
+        for import_entry_uuid, subfolder in zip(
+            uuids_to_create, reader.iter_node_repos(uuids_to_create, callback=_callback)
+        ):
+            destdir = RepositoryFolder(section=Repository._section_name, uuid=import_entry_uuid)  # pylint: disable=protected-access
+            # Replace the folder, possibly destroying existing previous folders, and move the files
+            # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder)
+            destdir.replace_with_folder(subfolder.abspath, move=True, overwrite=True)
+
+
 def _make_import_group(
     *, group: Optional[ImportGroup], existing_entries: Dict[str, Dict[str, dict]],
     new_entries: Dict[str, Dict[str, dict]], foreign_ids_reverse_mappings: Dict[str, Dict[str, int]]

diff --git a/aiida/tools/importexport/dbimport/backends/django.py b/aiida/tools/importexport/dbimport/backends/django.py
@@ -13,13 +13,11 @@
 from typing import Any, Dict, Iterable, Optional, Set, Tuple
 import warnings
 
-from aiida.common.folders import RepositoryFolder
 from aiida.common.links import LinkType, validate_link_label
 from aiida.common.progress_reporter import get_progress_reporter
 from aiida.common.utils import get_object_from_string, validate_uuid
 from aiida.common.warnings import AiidaDeprecationWarning
 from aiida.manage.configuration import get_config_option
-from aiida.orm.utils._repository import Repository
 from aiida.orm import Group
 
 from aiida.tools.importexport.common import exceptions
@@ -36,7 +34,7 @@
 from aiida.tools.importexport.archive.readers import ArchiveReaderAbstract, get_reader
 
 from aiida.tools.importexport.dbimport.backends.common import (
-    _make_import_group, _sanitize_extras, MAX_COMPUTERS, MAX_GROUPS
+    _copy_node_repositories, _make_import_group, _sanitize_extras, MAX_COMPUTERS, MAX_GROUPS
 )
 
 
@@ -447,17 +445,8 @@ def _store_entity_data(
 
         # Before storing entries in the DB, I store the files (if these are nodes).
         # Note: only for new entries!
-        if objects_to_create:
-            IMPORT_LOGGER.debug('CREATING NEW NODE REPOSITORIES...')
         uuids_to_create = [obj.uuid for obj in objects_to_create]
-        for import_entry_uuid, subfolder in zip(
-            uuids_to_create,
-            reader.iter_node_repos(uuids_to_create, progress=True, description='Copying Repository Folders')
-        ):
-            destdir = RepositoryFolder(section=Repository._section_name, uuid=import_entry_uuid)
-            # Replace the folder, possibly destroying existing previous folders, and move the files
-            # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder)
-            destdir.replace_with_folder(subfolder.abspath, move=True, overwrite=True)
+        _copy_node_repositories(uuids_to_create=uuids_to_create, reader=reader)
 
         # For the existing nodes that are also in the imported list we also update their extras if necessary
         if existing_entries[entity_name]:

diff --git a/aiida/tools/importexport/dbimport/backends/sqla.py b/aiida/tools/importexport/dbimport/backends/sqla.py
@@ -7,7 +7,7 @@
 # For further information on the license, see the LICENSE.txt file        #
 # For further information please visit http://www.aiida.net               #
 ###########################################################################
-# pylint: disable=too-many-nested-blocks,protected-access,fixme,too-many-arguments,too-many-locals,too-many-branches,too-many-statements
+# pylint: disable=too-many-nested-blocks,fixme,too-many-arguments,too-many-locals,too-many-branches,too-many-statements
 """ SQLAlchemy-specific import of AiiDA entities """
 from contextlib import contextmanager
 from itertools import chain
@@ -17,14 +17,12 @@
 from sqlalchemy.orm import Session
 
 from aiida.common import json
-from aiida.common.folders import RepositoryFolder
 from aiida.common.links import LinkType
 from aiida.common.progress_reporter import get_progress_reporter
 from aiida.common.utils import get_object_from_string, validate_uuid
 from aiida.common.warnings import AiidaDeprecationWarning
 from aiida.orm import QueryBuilder, Node, Group
 from aiida.orm.utils.links import link_triple_exists, validate_link
-from aiida.orm.utils._repository import Repository
 
 from aiida.tools.importexport.common import exceptions
 from aiida.tools.importexport.common.config import DUPL_SUFFIX
@@ -42,7 +40,7 @@
 from aiida.tools.importexport.archive.readers import ArchiveReaderAbstract, get_reader
 
 from aiida.tools.importexport.dbimport.backends.common import (
-    _make_import_group, _sanitize_extras, MAX_COMPUTERS, MAX_GROUPS
+    _copy_node_repositories, _make_import_group, _sanitize_extras, MAX_COMPUTERS, MAX_GROUPS
 )
 
 
@@ -514,17 +512,8 @@ def _store_entity_data(
 
         # Before storing entries in the DB, I store the files (if these are nodes).
         # Note: only for new entries!
-        if objects_to_create:
-            IMPORT_LOGGER.debug('CREATING NEW NODE REPOSITORIES...')
         uuids_to_create = [obj.uuid for obj in objects_to_create]
-        for import_entry_uuid, subfolder in zip(
-            uuids_to_create,
-            reader.iter_node_repos(uuids_to_create, progress=True, description='Copying Repository Folders')
-        ):
-            destdir = RepositoryFolder(section=Repository._section_name, uuid=import_entry_uuid)
-            # Replace the folder, possibly destroying existing previous folders, and move the files
-            # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder)
-            destdir.replace_with_folder(subfolder.abspath, move=True, overwrite=True)
+        _copy_node_repositories(uuids_to_create=uuids_to_create, reader=reader)
 
         # For the existing nodes that are also in the imported list we also update their extras if necessary
         if existing_entries[entity_name]: