diff --git a/datalad_next/commands/tree.py b/datalad_next/commands/tree.py index c6662587..41f3553c 100644 --- a/datalad_next/commands/tree.py +++ b/datalad_next/commands/tree.py @@ -34,7 +34,7 @@ NoDatasetFound ) -from datalad.local.subdatasets import Subdatasets +from datalad_next.iter_collections import iter_submodules from datalad_next.constraints import ( EnsureBool, EnsureDataset, @@ -608,22 +608,10 @@ def get_subds_paths(ds_path: Path): # submodules. Since we need to run it to (A) calculate dataset depth and # (B) detect non-installed datasets, we cache results, so that the list of # subdatasets is computed only once for each parent dataset. - - def res_filter(res): - return res.get('status') == 'ok' and res.get('type') == 'dataset' - - # call subdatasets command instead of dataset method `ds.subdatasets()` - # to avoid potentially expensive import of full datalad API - return Subdatasets.__call__( - dataset=ds_path, - recursive=False, - state='any', # include not-installed subdatasets - result_filter=res_filter, - on_failure='ignore', - result_xfm='paths', - result_renderer='disabled', - return_type='list' - ) + return [ + str(ds_path / sm.path) + for sm in iter_submodules(ds_path) + ] @lru_cache() diff --git a/datalad_next/iter_collections/__init__.py b/datalad_next/iter_collections/__init__.py index 72843ab1..26ee9fb0 100644 --- a/datalad_next/iter_collections/__init__.py +++ b/datalad_next/iter_collections/__init__.py @@ -23,6 +23,7 @@ iter_gitstatus iter_gittree iter_gitworktree + iter_submodules iter_tar iter_zip TarfileItem @@ -67,6 +68,7 @@ # TODO move to datalad_next.types? GitWorktreeFileSystemItem, iter_gitworktree, + iter_submodules, ) from .annexworktree import ( iter_annexworktree, diff --git a/datalad_next/iter_collections/gitstatus.py b/datalad_next/iter_collections/gitstatus.py index aa37e907..f5c31f62 100644 --- a/datalad_next/iter_collections/gitstatus.py +++ b/datalad_next/iter_collections/gitstatus.py @@ -22,7 +22,6 @@ ) from datalad_next.repo_utils import ( get_worktree_head, - iter_submodules, ) from .gitdiff import ( @@ -35,6 +34,7 @@ GitTreeItem, GitTreeItemType, iter_gitworktree, + iter_submodules, lsfiles_untracked_args, _git_ls_files, ) diff --git a/datalad_next/iter_collections/gitworktree.py b/datalad_next/iter_collections/gitworktree.py index c7f6a18b..3ba3e925 100644 --- a/datalad_next/iter_collections/gitworktree.py +++ b/datalad_next/iter_collections/gitworktree.py @@ -235,6 +235,28 @@ def iter_gitworktree( # report in the next loop iteration +def iter_submodules( + path: Path, +) -> Generator[GitTreeItem, None, None]: + """Given a path, report all submodules of a repository worktree underneath + + This is a thin convenience wrapper around ``iter_gitworktree()``. + """ + for item in iter_gitworktree( + path, + untracked=None, + link_target=False, + fp=False, + recursive='repository', + ): + # exclude non-submodules, or a submodule that was found at + # the root path -- which would indicate that the submodule + # itself it not around, only its record in the parent + if item.gittype == GitTreeItemType.submodule \ + and item.name != PurePath('.'): + yield item + + def _get_item( basepath: Path, link_target: bool, diff --git a/datalad_next/repo_utils/__init__.py b/datalad_next/repo_utils/__init__.py index d1160e89..612e5ef4 100644 --- a/datalad_next/repo_utils/__init__.py +++ b/datalad_next/repo_utils/__init__.py @@ -1,74 +1,12 @@ """Common repository operations -""" -from __future__ import annotations - -from pathlib import ( - Path, - PurePath, -) -from typing import Generator - -from datalad_next.exceptions import CapturedException -from datalad_next.iter_collections.gitworktree import ( - GitTreeItem, - GitTreeItemType, - iter_gitworktree, -) -from datalad_next.runners import ( - CommandError, - call_git_lines, -) - - -def iter_submodules( - path: Path, -) -> Generator[GitTreeItem, None, None]: - """Given a path, report all submodules of a repository underneath it""" - for item in iter_gitworktree( - path, - untracked=None, - link_target=False, - fp=False, - recursive='repository', - ): - # exclude non-submodules, or a submodule that was found at - # the root path -- which would indicate that the submodule - # itself it not around, only its record in the parent - if item.gittype == GitTreeItemType.submodule \ - and item.name != PurePath('.'): - yield item +.. currentmodule:: datalad_next.repo_utils +.. autosummary:: + :toctree: generated -def get_worktree_head( - path: Path, -) -> tuple[str | None, str | None]: - try: - HEAD = call_git_lines( - # we add the pathspec disambiguator to get cleaner error messages - # (and we only report the first item below, to take it off again) - ['rev-parse', '-q', '--symbolic-full-name', 'HEAD', '--'], - cwd=path, - )[0] - except (NotADirectoryError, FileNotFoundError) as e: - raise ValueError('path not found') from e - except CommandError as e: - CapturedException(e) - if 'fatal: not a git repository' in e.stderr: - raise ValueError(f'no Git repository at {path!r}') from e - elif 'fatal: bad revision' in e.stderr: - return (None, None) - else: - # no idea reraise - raise + get_worktree_head +""" - if HEAD.startswith('refs/heads/adjusted/'): - # this is a git-annex adjusted branch. do the comparison against - # its basis. it is not meaningful to track the managed branch in - # a superdataset - return ( - HEAD, - # replace 'refs/heads' with 'refs/basis' - f'refs/basis/{HEAD[11:]}', - ) - else: - return (HEAD, None) +from .worktree import ( + get_worktree_head, +) diff --git a/datalad_next/repo_utils/worktree.py b/datalad_next/repo_utils/worktree.py new file mode 100644 index 00000000..938841af --- /dev/null +++ b/datalad_next/repo_utils/worktree.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from pathlib import Path + +from datalad_next.exceptions import CapturedException +from datalad_next.runners import ( + CommandError, + call_git_lines, +) + + +def get_worktree_head( + path: Path, +) -> tuple[str | None, str | None]: + try: + HEAD = call_git_lines( + # we add the pathspec disambiguator to get cleaner error messages + # (and we only report the first item below, to take it off again) + ['rev-parse', '-q', '--symbolic-full-name', 'HEAD', '--'], + cwd=path, + )[0] + except (NotADirectoryError, FileNotFoundError) as e: + raise ValueError('path not found') from e + except CommandError as e: + CapturedException(e) + if 'fatal: not a git repository' in e.stderr: + raise ValueError(f'no Git repository at {path!r}') from e + elif 'fatal: bad revision' in e.stderr: + return (None, None) + else: + # no idea reraise + raise + + if HEAD.startswith('refs/heads/adjusted/'): + # this is a git-annex adjusted branch. do the comparison against + # its basis. it is not meaningful to track the managed branch in + # a superdataset + return ( + HEAD, + # replace 'refs/heads' with 'refs/basis' + f'refs/basis/{HEAD[11:]}', + ) + else: + return (HEAD, None) + diff --git a/docs/source/pyutils.rst b/docs/source/pyutils.rst index 48453603..78adfbb7 100644 --- a/docs/source/pyutils.rst +++ b/docs/source/pyutils.rst @@ -26,6 +26,7 @@ packages. iterable_subprocess itertools iter_collections + repo_utils runners tests tests.fixtures