From a9b32a1f61819ee9927d0640b0a97a75005a94cf Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sun, 13 Oct 2024 22:28:54 -0400 Subject: [PATCH 1/6] fix for cloud paths The _check_path_matches_patterns function was failing for cloud (e.g. s3, gc3) URIs. 1) the path.relative_to() function doesn't work for cloud URIs (it just returns the original). 2) combining Path('/') with the result of relative_to now also throws an error when using cloud (or non-posix) paths. This fix drops the uri prefix by using .path to get the posix-like part of the path, so that the regex check works as expected. --- bids/layout/index.py | 7 ++++++- setup.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/bids/layout/index.py b/bids/layout/index.py index 8f163228d..43bece8ee 100644 --- a/bids/layout/index.py +++ b/bids/layout/index.py @@ -41,12 +41,17 @@ def _extract_entities(bidsfile, entities): def _check_path_matches_patterns(path, patterns, root=None): """Check if the path matches at least one of the provided patterns. """ + if not patterns: return False path = path.absolute() if root is not None: - path = Path("/") / path.relative_to(root) + + if isinstance(path,Path): + path = Path("/") / Path(path.path).relative_to(Path(root.path)) + else: + path = Path("/") / path.relative_to(root) # Path now can be downcast to str path = str(path) diff --git a/setup.py b/setup.py index f01ff7f5f..e39e9d68e 100755 --- a/setup.py +++ b/setup.py @@ -3,6 +3,6 @@ import versioneer setup( - version=versioneer.get_version(), + version="0.17.2-dev", cmdclass=versioneer.get_cmdclass(), ) From 76b057ae81d5b6588bf74a5aafb9e2f7527e5785 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Thu, 17 Oct 2024 07:13:43 -0700 Subject: [PATCH 2/6] revert version change was just for local use --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e39e9d68e..f01ff7f5f 100755 --- a/setup.py +++ b/setup.py @@ -3,6 +3,6 @@ import versioneer setup( - version="0.17.2-dev", + version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), ) From 07bdfcd28552ae7d84ef2de8be3b6a8e5d999799 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Fri, 25 Oct 2024 21:03:31 -0400 Subject: [PATCH 3/6] fix for remote uris The check if in a derivatives folder was not working when the root had a uri prefix. This fixes it so the uri prefix is stripped off for the check. --- bids/layout/index.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bids/layout/index.py b/bids/layout/index.py index 43bece8ee..048c11d23 100644 --- a/bids/layout/index.py +++ b/bids/layout/index.py @@ -187,11 +187,11 @@ def _validate_file(self, f): return self.validator.is_bids(to_check) def _index_dir(self, path, config, force=None): - - abs_path = self._layout._root / path + root_path = Path(self._layout._root.path) # drops the uri prefix if it is there + abs_path = root_path / Path(path.path).relative_to(root_path) # Derivative directories must always be added separately - if self._layout._root.joinpath('derivatives') in abs_path.parents: + if root_path.joinpath('derivatives') in abs_path.parents: return [], [] config = list(config) # Shallow copy @@ -212,6 +212,7 @@ def _index_dir(self, path, config, force=None): # Get lists of 1st-level subdirectories and files in the path directory _, dirnames, filenames = next(path.fs.walk(path.path)) + # Symbolic links are returned as filenames even if they point to directories # Temporary list to store symbolic links that need to be removed from filenames links_to_dirs = [] From c9f7c1fbf3debee8d9660659822f04718beda764 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Fri, 25 Oct 2024 21:05:11 -0400 Subject: [PATCH 4/6] adds a simple test for remote s3 datasets Checks the number of files in an example openneuro s3 dataset. Also adds s3fs as a test dependency. --- bids/layout/tests/test_remote_bids.py | 26 ++++++++++++++++++++++++++ pyproject.toml | 1 + 2 files changed, 27 insertions(+) create mode 100644 bids/layout/tests/test_remote_bids.py diff --git a/bids/layout/tests/test_remote_bids.py b/bids/layout/tests/test_remote_bids.py new file mode 100644 index 000000000..26b26431e --- /dev/null +++ b/bids/layout/tests/test_remote_bids.py @@ -0,0 +1,26 @@ +""" Tests runs layout on bids examples and make sure all files are caught""" + +""" TODO +- add more 'vanilla' datasets +- missing files in micr? +""" + +import pytest + +from bids.layout import BIDSLayout + +# Values for the number of files by downloading dataset first + +@pytest.mark.parametrize( + "dataset, nb_files", + [ + ("s3://openneuro.org/ds000102", 136), + ], +) +def test_layout_on_s3_datasets_no_derivatives(dataset, nb_files): + layout = BIDSLayout(dataset) + files = layout.get() + assert len(files) == nb_files + + + diff --git a/pyproject.toml b/pyproject.toml index 5602b721e..d8711ff65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ test = [ "coverage[toml]", "altair", "pytest-xdist", + "s3fs" #for testing remote uri ] model_reports = [ "jinja2", From d96b7b4090e7a8d6cb38f33ad30e258ebf497e8d Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Mon, 28 Oct 2024 11:22:47 -0400 Subject: [PATCH 5/6] TEST: Use anonymous s3 connnection for test --- bids/layout/tests/test_remote_bids.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/bids/layout/tests/test_remote_bids.py b/bids/layout/tests/test_remote_bids.py index 26b26431e..33e6a3923 100644 --- a/bids/layout/tests/test_remote_bids.py +++ b/bids/layout/tests/test_remote_bids.py @@ -1,26 +1,24 @@ -""" Tests runs layout on bids examples and make sure all files are caught""" +"""Tests runs layout on bids examples and make sure all files are caught""" -""" TODO -- add more 'vanilla' datasets -- missing files in micr? -""" +# TODO +# - add more 'vanilla' datasets +# - missing files in micr? import pytest +from upath import UPath from bids.layout import BIDSLayout # Values for the number of files by downloading dataset first + @pytest.mark.parametrize( "dataset, nb_files", [ - ("s3://openneuro.org/ds000102", 136), + (UPath("s3://openneuro.org/ds000102", anon=True), 136), ], ) def test_layout_on_s3_datasets_no_derivatives(dataset, nb_files): layout = BIDSLayout(dataset) files = layout.get() assert len(files) == nb_files - - - From 962cfe7e678c067fa30215956380ba20a5f79b1c Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Mon, 4 Nov 2024 11:07:40 -0500 Subject: [PATCH 6/6] Update bids/layout/tests/test_remote_bids.py --- bids/layout/tests/test_remote_bids.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bids/layout/tests/test_remote_bids.py b/bids/layout/tests/test_remote_bids.py index 33e6a3923..cff216729 100644 --- a/bids/layout/tests/test_remote_bids.py +++ b/bids/layout/tests/test_remote_bids.py @@ -5,6 +5,7 @@ # - missing files in micr? import pytest +from botocore.exceptions import NoCredentialsError from upath import UPath from bids.layout import BIDSLayout @@ -18,6 +19,7 @@ (UPath("s3://openneuro.org/ds000102", anon=True), 136), ], ) +@pytest.mark.xfail(raises=NoCredentialsError) def test_layout_on_s3_datasets_no_derivatives(dataset, nb_files): layout = BIDSLayout(dataset) files = layout.get()