Skip to content

Commit

Permalink
Merge pull request #628 from mih/rftree
Browse files Browse the repository at this point in the history
Free `tree` command from `subdatasets` dependency
  • Loading branch information
mih authored Feb 6, 2024
2 parents 8c921e2 + 9d24b49 commit 7a08a58
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 88 deletions.
22 changes: 5 additions & 17 deletions datalad_next/commands/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
NoDatasetFound
)

from datalad.local.subdatasets import Subdatasets
from datalad_next.iter_collections import iter_submodules
from datalad_next.constraints import (
EnsureBool,
EnsureDataset,
Expand Down Expand Up @@ -608,22 +608,10 @@ def get_subds_paths(ds_path: Path):
# submodules. Since we need to run it to (A) calculate dataset depth and
# (B) detect non-installed datasets, we cache results, so that the list of
# subdatasets is computed only once for each parent dataset.

def res_filter(res):
return res.get('status') == 'ok' and res.get('type') == 'dataset'

# call subdatasets command instead of dataset method `ds.subdatasets()`
# to avoid potentially expensive import of full datalad API
return Subdatasets.__call__(
dataset=ds_path,
recursive=False,
state='any', # include not-installed subdatasets
result_filter=res_filter,
on_failure='ignore',
result_xfm='paths',
result_renderer='disabled',
return_type='list'
)
return [
str(ds_path / sm.path)
for sm in iter_submodules(ds_path)
]


@lru_cache()
Expand Down
2 changes: 2 additions & 0 deletions datalad_next/iter_collections/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
iter_gitstatus
iter_gittree
iter_gitworktree
iter_submodules
iter_tar
iter_zip
TarfileItem
Expand Down Expand Up @@ -67,6 +68,7 @@
# TODO move to datalad_next.types?
GitWorktreeFileSystemItem,
iter_gitworktree,
iter_submodules,
)
from .annexworktree import (
iter_annexworktree,
Expand Down
2 changes: 1 addition & 1 deletion datalad_next/iter_collections/gitstatus.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
)
from datalad_next.repo_utils import (
get_worktree_head,
iter_submodules,
)

from .gitdiff import (
Expand All @@ -35,6 +34,7 @@
GitTreeItem,
GitTreeItemType,
iter_gitworktree,
iter_submodules,
lsfiles_untracked_args,
_git_ls_files,
)
Expand Down
22 changes: 22 additions & 0 deletions datalad_next/iter_collections/gitworktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,28 @@ def iter_gitworktree(
# report in the next loop iteration


def iter_submodules(
path: Path,
) -> Generator[GitTreeItem, None, None]:
"""Given a path, report all submodules of a repository worktree underneath
This is a thin convenience wrapper around ``iter_gitworktree()``.
"""
for item in iter_gitworktree(
path,
untracked=None,
link_target=False,
fp=False,
recursive='repository',
):
# exclude non-submodules, or a submodule that was found at
# the root path -- which would indicate that the submodule
# itself it not around, only its record in the parent
if item.gittype == GitTreeItemType.submodule \
and item.name != PurePath('.'):
yield item


def _get_item(
basepath: Path,
link_target: bool,
Expand Down
78 changes: 8 additions & 70 deletions datalad_next/repo_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,12 @@
"""Common repository operations
"""
from __future__ import annotations

from pathlib import (
Path,
PurePath,
)
from typing import Generator

from datalad_next.exceptions import CapturedException
from datalad_next.iter_collections.gitworktree import (
GitTreeItem,
GitTreeItemType,
iter_gitworktree,
)
from datalad_next.runners import (
CommandError,
call_git_lines,
)


def iter_submodules(
path: Path,
) -> Generator[GitTreeItem, None, None]:
"""Given a path, report all submodules of a repository underneath it"""
for item in iter_gitworktree(
path,
untracked=None,
link_target=False,
fp=False,
recursive='repository',
):
# exclude non-submodules, or a submodule that was found at
# the root path -- which would indicate that the submodule
# itself it not around, only its record in the parent
if item.gittype == GitTreeItemType.submodule \
and item.name != PurePath('.'):
yield item
.. currentmodule:: datalad_next.repo_utils
.. autosummary::
:toctree: generated
def get_worktree_head(
path: Path,
) -> tuple[str | None, str | None]:
try:
HEAD = call_git_lines(
# we add the pathspec disambiguator to get cleaner error messages
# (and we only report the first item below, to take it off again)
['rev-parse', '-q', '--symbolic-full-name', 'HEAD', '--'],
cwd=path,
)[0]
except (NotADirectoryError, FileNotFoundError) as e:
raise ValueError('path not found') from e
except CommandError as e:
CapturedException(e)
if 'fatal: not a git repository' in e.stderr:
raise ValueError(f'no Git repository at {path!r}') from e
elif 'fatal: bad revision' in e.stderr:
return (None, None)
else:
# no idea reraise
raise
get_worktree_head
"""

if HEAD.startswith('refs/heads/adjusted/'):
# this is a git-annex adjusted branch. do the comparison against
# its basis. it is not meaningful to track the managed branch in
# a superdataset
return (
HEAD,
# replace 'refs/heads' with 'refs/basis'
f'refs/basis/{HEAD[11:]}',
)
else:
return (HEAD, None)
from .worktree import (
get_worktree_head,
)
45 changes: 45 additions & 0 deletions datalad_next/repo_utils/worktree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from __future__ import annotations

from pathlib import Path

from datalad_next.exceptions import CapturedException
from datalad_next.runners import (
CommandError,
call_git_lines,
)


def get_worktree_head(
path: Path,
) -> tuple[str | None, str | None]:
try:
HEAD = call_git_lines(
# we add the pathspec disambiguator to get cleaner error messages
# (and we only report the first item below, to take it off again)
['rev-parse', '-q', '--symbolic-full-name', 'HEAD', '--'],
cwd=path,
)[0]
except (NotADirectoryError, FileNotFoundError) as e:
raise ValueError('path not found') from e
except CommandError as e:
CapturedException(e)
if 'fatal: not a git repository' in e.stderr:
raise ValueError(f'no Git repository at {path!r}') from e
elif 'fatal: bad revision' in e.stderr:
return (None, None)
else:
# no idea reraise
raise

if HEAD.startswith('refs/heads/adjusted/'):
# this is a git-annex adjusted branch. do the comparison against
# its basis. it is not meaningful to track the managed branch in
# a superdataset
return (
HEAD,
# replace 'refs/heads' with 'refs/basis'
f'refs/basis/{HEAD[11:]}',
)
else:
return (HEAD, None)

1 change: 1 addition & 0 deletions docs/source/pyutils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ packages.
iterable_subprocess
itertools
iter_collections
repo_utils
runners
tests
tests.fixtures
Expand Down

0 comments on commit 7a08a58

Please sign in to comment.