From bf0b9f6fda843322c7cc42fdc8202f0c04129ff3 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 12 Apr 2024 13:53:13 -0400 Subject: [PATCH 01/13] Establishing tools/bids-2.0 to be used in this branch for migrations --- tools/bids-2.0/README.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tools/bids-2.0/README.md diff --git a/tools/bids-2.0/README.md b/tools/bids-2.0/README.md new file mode 100644 index 0000000000..860fcb875a --- /dev/null +++ b/tools/bids-2.0/README.md @@ -0,0 +1,4 @@ +This directory to contain various little helpers which would script desired +automated changes (where possible) for migrating specification (not datasets) +to BIDS 2.0. Ideally scripts should have some header pointing to the +underlying issue they are addressing. From a4aa50955a4468bfbdf5305a808b77965caffc8e Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 12 Apr 2024 13:56:33 -0400 Subject: [PATCH 02/13] Script to rename "participant"s to "subject"s --- .../bids-2.0/rename_participants_to_subjects | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100755 tools/bids-2.0/rename_participants_to_subjects diff --git a/tools/bids-2.0/rename_participants_to_subjects b/tools/bids-2.0/rename_participants_to_subjects new file mode 100755 index 0000000000..03cd2e53f4 --- /dev/null +++ b/tools/bids-2.0/rename_participants_to_subjects @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Description: rename all mentionings of participants to subjects +# +# Reference issues: +# - https://github.com/bids-standard/bids-2-devel/issues/14 + +set -eu -o pipefail + +pathspec=( ':(exclude)src/CHANGES.md' ':(exclude)src/appendices/licenses.md' ':(exclude)CODE_OF_CONDUCT.md' ':(exclude)tools/bids-2.0/*' ) + +# Plural -- no need really for a separate invocation since plural is just singular + s +# git grep -l participants -- "${pathspec[@]}" | xargs sed -i -e 's,participants,subjects,g' + +# Singular -- tricky since we want to exclude some. According to chatgpt no built in negative back lookup +# so suggested to replace with PLACEHOLDER +if git grep -q -l PLACEHOLDER -- "${pathspec[@]}"; then + echo "Assertion error -- PLACEHOLDER already used somehow" >&2 + exit 1 +fi + +git grep -l '[Pp]articipant' -- "${pathspec[@]}" | \ + xargs sed -i -e 's/\(Experiment-\|as reported by the \)participant/PLACEHOLDER#\1#/g' \ + -e 's/participant/subject/g' \ + -e 's/Participant/Subject/g' \ + -e 's/PLACEHOLDER#\([^#]*\)#/\1participant/g' From a05a8b454947d37ec1e603bbf265f75c0459504f Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 12 Apr 2024 15:16:38 -0400 Subject: [PATCH 03/13] Initiate "bst migrate-dataset" command with handling of participants renaming --- tools/schemacode/bidsschematools/__main__.py | 9 +++++ tools/schemacode/bidsschematools/conftest.py | 20 +++++++++- tools/schemacode/bidsschematools/dataset.py | 41 ++++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 tools/schemacode/bidsschematools/dataset.py diff --git a/tools/schemacode/bidsschematools/__main__.py b/tools/schemacode/bidsschematools/__main__.py index c8d554cf0a..a896bc5147 100644 --- a/tools/schemacode/bidsschematools/__main__.py +++ b/tools/schemacode/bidsschematools/__main__.py @@ -3,6 +3,7 @@ import click +from .dataset import migrate_dataset as migrate_dataset_ from .schema import export_schema, load_schema @@ -32,5 +33,13 @@ def export(ctx, schema, output): fobj.write(text) +@cli.command() +@click.argument("dataset_path", type=click.Path(dir_okay=True, file_okay=False)) +@click.pass_context +def migrate_dataset(ctx, dataset_path): + """Migrate BIDS 1.x dataset to BIDS 2.0""" + migrate_dataset_(dataset_path) + + if __name__ == "__main__": cli() diff --git a/tools/schemacode/bidsschematools/conftest.py b/tools/schemacode/bidsschematools/conftest.py index 3154e3e5cf..e3c9fd740a 100644 --- a/tools/schemacode/bidsschematools/conftest.py +++ b/tools/schemacode/bidsschematools/conftest.py @@ -1,5 +1,6 @@ import logging import tempfile +from pathlib import Path from subprocess import run try: @@ -95,10 +96,27 @@ def schema_obj(): return schema.load_schema() -bids_examples = get_gitrepo_fixture( +bids_examples_pristine = get_gitrepo_fixture( "https://github.com/bids-standard/bids-examples", whitelist=BIDS_SELECTION, ) + + +@pytest.fixture(scope="session") +def bids_examples(bids_examples_pristine): + """Migrates examples to BIDS 2.0 before giving it back to tests""" + + from bidsschematools.dataset import migrate_dataset + + # TODO: potentially make it recursive in finding derivatives, + # as e.g. we have in ./ieeg_epilepsy_ecog/derivatives/freesurfer/participants.tsv + for item in Path(bids_examples_pristine).iterdir(): + if item.is_dir() and (item / "dataset_description.json").exists(): + migrate_dataset(item) + + return bids_examples_pristine + + bids_error_examples = get_gitrepo_fixture( "https://github.com/bids-standard/bids-error-examples", whitelist=BIDS_ERROR_SELECTION, diff --git a/tools/schemacode/bidsschematools/dataset.py b/tools/schemacode/bidsschematools/dataset.py new file mode 100644 index 0000000000..fba1bcdb70 --- /dev/null +++ b/tools/schemacode/bidsschematools/dataset.py @@ -0,0 +1,41 @@ +import os +from pathlib import Path + +import bidsschematools as bst +import bidsschematools.utils + +lgr = bst.utils.get_logger() + + +def migrate_version(dataset_path: Path): + """TODO: modify BIDSVersion in dataset_description.json + + Should do as a string manipulation not json to minimize + the diff""" + pass + + +def migrate_participants(dataset_path: Path): + extensions = [".tsv", ".json"] + + for ext in extensions: + old_file = Path(dataset_path) / f"participants{ext}" + new_file = Path(dataset_path) / f"subjects{ext}" + if old_file.exists(): + os.rename(old_file, new_file) + lgr.info(f" - renamed {old_file} to {new_file}") + if ext == ".tsv": + migrated = new_file.read_text().replace("participant_id", "subject_id", 1) + new_file.write_text(migrated) + lgr.info(f" - migrated content in {new_file}") + + +def migrate_dataset(dataset_path): + lgr.info(f"Migrating dataset at {dataset_path}") + + # TODO: possibly add a check for BIDS version in dataset_description.json + # and skip if already 2.0, although ideally transformations + # should also be indepotent + for migration in [migrate_participants, migrate_version]: + lgr.info(f" - applying migration {migration.__name__}") + migration(dataset_path) From 46b79d4aa54e40b6b24a1db4cef4fa1cd5e8f607 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 12 Apr 2024 17:19:40 -0400 Subject: [PATCH 04/13] Make migrate-dataset into migrate-datasets + add migration of BIDSVersion --- tools/schemacode/bidsschematools/__main__.py | 17 +++++++--- tools/schemacode/bidsschematools/dataset.py | 33 +++++++++++++++++--- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/tools/schemacode/bidsschematools/__main__.py b/tools/schemacode/bidsschematools/__main__.py index a896bc5147..5774374423 100644 --- a/tools/schemacode/bidsschematools/__main__.py +++ b/tools/schemacode/bidsschematools/__main__.py @@ -34,11 +34,20 @@ def export(ctx, schema, output): @cli.command() -@click.argument("dataset_path", type=click.Path(dir_okay=True, file_okay=False)) +@click.argument("dataset_paths", type=click.Path(dir_okay=True, file_okay=False), nargs=-1) @click.pass_context -def migrate_dataset(ctx, dataset_path): - """Migrate BIDS 1.x dataset to BIDS 2.0""" - migrate_dataset_(dataset_path) +def migrate_datasets(ctx, dataset_paths): + """Migrate BIDS 1.x dataset to BIDS 2.0 + + Example of running in bids-examples to apply to all and review the diff + + \b + git clean -dfx && git reset --hard \\ + && { /bin/ls */dataset_description.json | sed -e 's,/.*,,g' | xargs bst migrate-datasets } \\ + && git add . && git diff --cached + """ + for dataset_path in dataset_paths: + migrate_dataset_(dataset_path) if __name__ == "__main__": diff --git a/tools/schemacode/bidsschematools/dataset.py b/tools/schemacode/bidsschematools/dataset.py index fba1bcdb70..09848acbb9 100644 --- a/tools/schemacode/bidsschematools/dataset.py +++ b/tools/schemacode/bidsschematools/dataset.py @@ -1,4 +1,6 @@ +import json import os +import re from pathlib import Path import bidsschematools as bst @@ -6,21 +8,36 @@ lgr = bst.utils.get_logger() +TARGET_VERSION = "2.0.0" + + +def get_bids_version(dataset_path: Path) -> str: + dataset_description = dataset_path / "dataset_description.json" + if not dataset_description.exists(): + raise ValueError(f"dataset_description.json not found in {dataset_path}") + return json.loads(dataset_description.read_text())["BIDSVersion"] + def migrate_version(dataset_path: Path): """TODO: modify BIDSVersion in dataset_description.json Should do as a string manipulation not json to minimize the diff""" - pass + dataset_description = dataset_path / "dataset_description.json" + # Read/write as bytes so we do not change Windows line endings + content = dataset_description.read_bytes().decode() + old_version = json.loads(content)["BIDSVersion"] + migrated = re.sub(rf'("BIDSVersion":\s*)"{old_version}', r'\1"' + TARGET_VERSION, content) + assert json.loads(migrated)["BIDSVersion"] == TARGET_VERSION + dataset_description.write_bytes(migrated.encode()) def migrate_participants(dataset_path: Path): extensions = [".tsv", ".json"] for ext in extensions: - old_file = Path(dataset_path) / f"participants{ext}" - new_file = Path(dataset_path) / f"subjects{ext}" + old_file = dataset_path / f"participants{ext}" + new_file = dataset_path / f"subjects{ext}" if old_file.exists(): os.rename(old_file, new_file) lgr.info(f" - renamed {old_file} to {new_file}") @@ -32,10 +49,16 @@ def migrate_participants(dataset_path: Path): def migrate_dataset(dataset_path): lgr.info(f"Migrating dataset at {dataset_path}") - + dataset_path = Path(dataset_path) + if get_bids_version(dataset_path) == TARGET_VERSION: + lgr.info(f"Dataset already at version {TARGET_VERSION}") + return # TODO: possibly add a check for BIDS version in dataset_description.json # and skip if already 2.0, although ideally transformations # should also be indepotent - for migration in [migrate_participants, migrate_version]: + for migration in [ + migrate_participants, + migrate_version, + ]: lgr.info(f" - applying migration {migration.__name__}") migration(dataset_path) From 991db747140a1da1049b59ede4eb9b88a9c53a36 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 12 Apr 2024 18:11:07 -0400 Subject: [PATCH 05/13] Add apply_all script to apply all changes --- tools/bids-2.0/apply_all | 5 +++++ 1 file changed, 5 insertions(+) create mode 100755 tools/bids-2.0/apply_all diff --git a/tools/bids-2.0/apply_all b/tools/bids-2.0/apply_all new file mode 100755 index 0000000000..a605b222f6 --- /dev/null +++ b/tools/bids-2.0/apply_all @@ -0,0 +1,5 @@ +#!/bin/bash + +path="$(dirname "$0")" + +"$path/rename_participants_to_subjects" From 911629a81fa69ad80f9f3414164d66c98566c440 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 12 Apr 2024 19:05:56 -0400 Subject: [PATCH 06/13] Rename dataset.py to migrations.py and exclude it in git grepping --- tools/bids-2.0/rename_participants_to_subjects | 2 +- tools/schemacode/bidsschematools/__main__.py | 2 +- tools/schemacode/bidsschematools/conftest.py | 2 +- tools/schemacode/bidsschematools/{dataset.py => migrations.py} | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename tools/schemacode/bidsschematools/{dataset.py => migrations.py} (100%) diff --git a/tools/bids-2.0/rename_participants_to_subjects b/tools/bids-2.0/rename_participants_to_subjects index 03cd2e53f4..7a868fad22 100755 --- a/tools/bids-2.0/rename_participants_to_subjects +++ b/tools/bids-2.0/rename_participants_to_subjects @@ -7,7 +7,7 @@ set -eu -o pipefail -pathspec=( ':(exclude)src/CHANGES.md' ':(exclude)src/appendices/licenses.md' ':(exclude)CODE_OF_CONDUCT.md' ':(exclude)tools/bids-2.0/*' ) +pathspec=( ':(exclude)src/CHANGES.md' ':(exclude)src/appendices/licenses.md' ':(exclude)CODE_OF_CONDUCT.md' ':(exclude)tools/bids-2.0/*' ':(exclude)tools/schemacode/bidsschematools/migrations.py' ) # Plural -- no need really for a separate invocation since plural is just singular + s # git grep -l participants -- "${pathspec[@]}" | xargs sed -i -e 's,participants,subjects,g' diff --git a/tools/schemacode/bidsschematools/__main__.py b/tools/schemacode/bidsschematools/__main__.py index 5774374423..87cd4387ab 100644 --- a/tools/schemacode/bidsschematools/__main__.py +++ b/tools/schemacode/bidsschematools/__main__.py @@ -3,7 +3,7 @@ import click -from .dataset import migrate_dataset as migrate_dataset_ +from .migrations import migrate_dataset as migrate_dataset_ from .schema import export_schema, load_schema diff --git a/tools/schemacode/bidsschematools/conftest.py b/tools/schemacode/bidsschematools/conftest.py index e3c9fd740a..9ed4ec7a11 100644 --- a/tools/schemacode/bidsschematools/conftest.py +++ b/tools/schemacode/bidsschematools/conftest.py @@ -106,7 +106,7 @@ def schema_obj(): def bids_examples(bids_examples_pristine): """Migrates examples to BIDS 2.0 before giving it back to tests""" - from bidsschematools.dataset import migrate_dataset + from bidsschematools.migrations import migrate_dataset # TODO: potentially make it recursive in finding derivatives, # as e.g. we have in ./ieeg_epilepsy_ecog/derivatives/freesurfer/participants.tsv diff --git a/tools/schemacode/bidsschematools/dataset.py b/tools/schemacode/bidsschematools/migrations.py similarity index 100% rename from tools/schemacode/bidsschematools/dataset.py rename to tools/schemacode/bidsschematools/migrations.py From 546ee68ffb9d232580ead94fb3a7aca43d1cd4a3 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 19 Apr 2024 22:02:32 -0400 Subject: [PATCH 07/13] Preserve line ending while replacing in participants.tsv --- tools/schemacode/bidsschematools/migrations.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/schemacode/bidsschematools/migrations.py b/tools/schemacode/bidsschematools/migrations.py index 09848acbb9..684c5d2aa6 100644 --- a/tools/schemacode/bidsschematools/migrations.py +++ b/tools/schemacode/bidsschematools/migrations.py @@ -42,8 +42,11 @@ def migrate_participants(dataset_path: Path): os.rename(old_file, new_file) lgr.info(f" - renamed {old_file} to {new_file}") if ext == ".tsv": - migrated = new_file.read_text().replace("participant_id", "subject_id", 1) - new_file.write_text(migrated) + # Do manual .decode() and .encode() to avoid changing line endings + migrated = ( + new_file.read_bytes().decode().replace("participant_id", "subject_id", 1) + ) + new_file.write_bytes(migrated.encode()) lgr.info(f" - migrated content in {new_file}") From 6ba17ce9008a15aab0d33a15ca8f0dfd3dcffbb3 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 12 Apr 2024 19:26:56 -0400 Subject: [PATCH 08/13] Original copy of validate_datasets.yml from bids-examples --- .github/workflows/validate_bids-examples.yml | 90 ++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 .github/workflows/validate_bids-examples.yml diff --git a/.github/workflows/validate_bids-examples.yml b/.github/workflows/validate_bids-examples.yml new file mode 100644 index 0000000000..0ed566aa65 --- /dev/null +++ b/.github/workflows/validate_bids-examples.yml @@ -0,0 +1,90 @@ +name: validate_datasets + +on: + push: + branches: ['**'] + pull_request: + branches: ['**'] + create: + branches: [master] + tags: ['**'] + schedule: + - cron: "0 4 * * 1" + +concurrency: + group: ${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + strategy: + fail-fast: false + matrix: + platform: [ubuntu-latest, macos-latest, windows-latest] + bids-validator: [master, stable] + + runs-on: ${{ matrix.platform }} + + env: + TZ: Europe/Berlin + FORCE_COLOR: 1 + + steps: + - uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: 18 + + - name: Install BIDS validator (stable) + if: "matrix.bids-validator == 'stable'" + run: | + npm install -g bids-validator + + - name: Install BIDS validator (master) + if: "matrix.bids-validator == 'master'" + run: | + pushd .. + # Get npm 7+ + npm install -g npm + git clone --depth 1 https://github.com/bids-standard/bids-validator + cd bids-validator + # Generate the full development node_modules + npm clean-install + # Build & bundle the bids-validator CLI package + npm -w bids-validator run build + # Generate a package to install globally + npm -w bids-validator pack + # Install the package globally + bash -c "npm install -g bids-validator-*.tgz" + popd + + - name: Display versions and environment information + run: | + echo $TZ + date + echo "npm"; npm --version + echo "node"; node --version + echo "bids-validator"; bids-validator --version + + - name: Check that no large files are present + if: "matrix.bids-validator == 'stable'" + run: | + echo "Checking for big files ..." + found=`find . -not -path "./.git*" -type f -size +500k` + if [ "$found" == "" ] + then + echo "No big files present, great!" + else + echo "Found big files:" + echo "$found" + exit 1; + fi + shell: bash + + - name: Validate all BIDS datasets using bids-validator + run: | + cat ./run_tests.sh + bash ./run_tests.sh + shell: bash From 6e0dc2092bac40f1f36ad80edb86abf896880bcb Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 12 Apr 2024 19:49:39 -0400 Subject: [PATCH 09/13] Add installation of bids-examples, migration of them, and testing --- .github/workflows/validate_bids-examples.yml | 115 +++++++++++-------- 1 file changed, 68 insertions(+), 47 deletions(-) diff --git a/.github/workflows/validate_bids-examples.yml b/.github/workflows/validate_bids-examples.yml index 0ed566aa65..3d685fbe76 100644 --- a/.github/workflows/validate_bids-examples.yml +++ b/.github/workflows/validate_bids-examples.yml @@ -2,14 +2,14 @@ name: validate_datasets on: push: - branches: ['**'] + branches: ['master'] pull_request: branches: ['**'] - create: - branches: [master] - tags: ['**'] - schedule: - - cron: "0 4 * * 1" +# create: +# branches: [master] +# tags: ['**'] +# schedule: +# - cron: "0 4 * * 1" concurrency: group: ${{ github.ref }} @@ -20,8 +20,9 @@ jobs: strategy: fail-fast: false matrix: - platform: [ubuntu-latest, macos-latest, windows-latest] - bids-validator: [master, stable] + platform: [ubuntu-latest] # , macos-latest, windows-latest] + bids-validator: [master-deno] + python-version: ["3.11"] runs-on: ${{ matrix.platform }} @@ -32,59 +33,79 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Node.js - uses: actions/setup-node@v4 + # Setup Python with bst + - uses: actions/setup-python@v5 with: - node-version: 18 + python-version: ${{ matrix.python-version }} + - name: "Install build dependencies" + run: pip install --upgrade build twine + - name: "Build source distribution and wheel" + run: python -m build tools/schemacode + - name: "Check distribution metadata" + run: twine check tools/schemacode/dist/* + - name: "Install bst tools from the build" + run: pip install $( ls tools/schemacode/dist/*.whl )[all] + - name: "Produce dump of the schema as schema.json" + run: bst -v export --output src/schema.json - - name: Install BIDS validator (stable) - if: "matrix.bids-validator == 'stable'" - run: | - npm install -g bids-validator + - uses: denoland/setup-deno@v1.1.2 + if: "matrix.bids-validator == 'master-deno'" + with: + deno-version: v1.x - - name: Install BIDS validator (master) - if: "matrix.bids-validator == 'master'" + - name: Install BIDS validator (master deno build) + if: "matrix.bids-validator == 'master-deno'" run: | pushd .. - # Get npm 7+ - npm install -g npm + # Let's use specific commit for now + # TODO: progress it once in a while + commit=a7b291b882a8c6184219ccb84faae255ba96203a git clone --depth 1 https://github.com/bids-standard/bids-validator cd bids-validator - # Generate the full development node_modules - npm clean-install - # Build & bundle the bids-validator CLI package - npm -w bids-validator run build - # Generate a package to install globally - npm -w bids-validator pack - # Install the package globally - bash -c "npm install -g bids-validator-*.tgz" + git fetch --depth 1 origin $commit; + echo -e '#!/bin/sh\n'"$PWD/bids-validator/bids-validator-deno \"\$@\"" >| /usr/local/bin/bids-validator + chmod a+x /usr/local/bin/bids-validator + which -a bids-validator + bids-validator --help popd - name: Display versions and environment information run: | echo $TZ date - echo "npm"; npm --version - echo "node"; node --version - echo "bids-validator"; bids-validator --version + echo -n "npm: "; npm --version + echo -n "node: "; node --version + echo -n "bids-validator: "; bids-validator --version + echo -n "python: "; python --version - - name: Check that no large files are present - if: "matrix.bids-validator == 'stable'" - run: | - echo "Checking for big files ..." - found=`find . -not -path "./.git*" -type f -size +500k` - if [ "$found" == "" ] - then - echo "No big files present, great!" - else - echo "Found big files:" - echo "$found" - exit 1; - fi + # Checkout bids-examples + - uses: actions/checkout@v4 + with: + # repository: bids-standard/bids-examples + # For now use the forked repository with support for deno validator + # from https://github.com/bids-standard/bids-examples/pull/435 + repository: yarikoptic/bids-examples + ref: deno-validator + path: bids-examples + + - name: Mark known not yet to be deno-legit BIDS datasets + run: touch {ds000117,ds000246,ds000247,ds000248,eeg_ds003645s_hed_demo,ieeg_motorMiller2007,ieeg_visual}/.SKIP_VALIDATION shell: bash + working-directory: bids-examples - - name: Validate all BIDS datasets using bids-validator - run: | - cat ./run_tests.sh - bash ./run_tests.sh + - name: Validate using bids-validator without migration + run: ./run_tests.sh + working-directory: bids-examples + + - name: Migrate all BIDS datasets + run: /bin/ls */dataset_description.json | sed -e 's,/.*,,g' | xargs bst migrate-datasets shell: bash + working-directory: bids-examples + + - name: Show migrated datasets diff + run: git diff + working-directory: bids-examples + + - name: Validate all BIDS datasets using bids-validator after migration + run: VALIDATOR_ARGS="--schema file://$PWD/../src/schema.json" bash ./run_tests.sh + working-directory: bids-examples From e5494b2f997da170f8bcc3c239cf91f6344940d8 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 19 Apr 2024 21:56:28 -0400 Subject: [PATCH 10/13] make migrate continue on non-bids, use git mv under git --- .github/workflows/validate_bids-examples.yml | 6 ++- .../schemacode/bidsschematools/migrations.py | 49 +++++++++++++++++-- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/.github/workflows/validate_bids-examples.yml b/.github/workflows/validate_bids-examples.yml index 3d685fbe76..7911ad5c82 100644 --- a/.github/workflows/validate_bids-examples.yml +++ b/.github/workflows/validate_bids-examples.yml @@ -103,9 +103,13 @@ jobs: working-directory: bids-examples - name: Show migrated datasets diff - run: git diff + run: git diff HEAD working-directory: bids-examples + # TODO: commit as a merge from current state of bids-examples + # and prior bids-2.0 branch there, but overloading with new updated + # state and recording commit hash of bids-specification used. + - name: Validate all BIDS datasets using bids-validator after migration run: VALIDATOR_ARGS="--schema file://$PWD/../src/schema.json" bash ./run_tests.sh working-directory: bids-examples diff --git a/tools/schemacode/bidsschematools/migrations.py b/tools/schemacode/bidsschematools/migrations.py index 684c5d2aa6..1cf0441cf1 100644 --- a/tools/schemacode/bidsschematools/migrations.py +++ b/tools/schemacode/bidsschematools/migrations.py @@ -1,7 +1,11 @@ import json import os import re +import subprocess +from functools import lru_cache +from itertools import chain from pathlib import Path +from typing import Optional import bidsschematools as bst import bidsschematools.utils @@ -11,10 +15,14 @@ TARGET_VERSION = "2.0.0" +class NotBIDSDatasetError(Exception): + pass + + def get_bids_version(dataset_path: Path) -> str: dataset_description = dataset_path / "dataset_description.json" if not dataset_description.exists(): - raise ValueError(f"dataset_description.json not found in {dataset_path}") + raise NotBIDSDatasetError(f"dataset_description.json not found in {dataset_path}") return json.loads(dataset_description.read_text())["BIDSVersion"] @@ -39,7 +47,7 @@ def migrate_participants(dataset_path: Path): old_file = dataset_path / f"participants{ext}" new_file = dataset_path / f"subjects{ext}" if old_file.exists(): - os.rename(old_file, new_file) + rename_path(old_file, new_file) lgr.info(f" - renamed {old_file} to {new_file}") if ext == ".tsv": # Do manual .decode() and .encode() to avoid changing line endings @@ -53,8 +61,12 @@ def migrate_participants(dataset_path: Path): def migrate_dataset(dataset_path): lgr.info(f"Migrating dataset at {dataset_path}") dataset_path = Path(dataset_path) - if get_bids_version(dataset_path) == TARGET_VERSION: - lgr.info(f"Dataset already at version {TARGET_VERSION}") + try: + if get_bids_version(dataset_path) == TARGET_VERSION: + lgr.info(f"Dataset already at version {TARGET_VERSION}") + return + except NotBIDSDatasetError: + lgr.warning("%s not a BIDS dataset, skipping", dataset_path) return # TODO: possibly add a check for BIDS version in dataset_description.json # and skip if already 2.0, although ideally transformations @@ -65,3 +77,32 @@ def migrate_dataset(dataset_path): ]: lgr.info(f" - applying migration {migration.__name__}") migration(dataset_path) + + +@lru_cache +def path_has_git(path: Path) -> bool: + return (path / ".git").exists() + + +def git_topdir(path: Path) -> Optional[Path]: + """Return top-level directory of a git repository containing path, + or None if not under git.""" + path = path.absolute() + for p in chain([path] if path.is_dir() else [], path.parents): + if path_has_git(p): + return p + return None + + +def rename_path(old_path: Path, new_path: Path): + """git aware rename. If under git, use git mv, otherwise just os.rename.""" + # if under git, use git mv but ensure that on border + # crossing (should just use DataLad and `mv` and it would do the right thing!) + if (old_git_top := git_topdir(old_path)) != (new_git_top := git_topdir(new_path)): + raise NotImplementedError( + f"Did not implement moving across git repo boundaries {old_git_top} -> {new_git_top}" + ) + if old_git_top: + subprocess.run(["git", "mv", str(old_path), str(new_path)], check=True, cwd=old_git_top) + else: + os.rename(old_path, new_path) From d1341a778e62bcf9853480facc72f03d74263cc6 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 26 Apr 2024 13:57:41 -0400 Subject: [PATCH 11/13] RF patching so we can run 'script patches' and also apply direct patches --- .codespellrc | 2 + tools/bids-2.0/README.md | 16 ++++++ tools/bids-2.0/apply_all | 26 ++++++++- .../01-01-rename_participants_to_subjects} | 0 ...ename_participants_to_subjects-fixup.patch | 53 +++++++++++++++++++ 5 files changed, 96 insertions(+), 1 deletion(-) rename tools/bids-2.0/{rename_participants_to_subjects => patches/01-01-rename_participants_to_subjects} (100%) create mode 100644 tools/bids-2.0/patches/01-02-rename_participants_to_subjects-fixup.patch diff --git a/.codespellrc b/.codespellrc index 22cd48ca96..8eae2b3993 100644 --- a/.codespellrc +++ b/.codespellrc @@ -1,5 +1,7 @@ [codespell] skip = *.js,*.svg,*.eps,.git,node_modules,env,venv,.mypy_cache,package-lock.json,CITATION.cff,tools/new_contributors.tsv,./tools/schemacode/docs/build +# Ignore diff/git format-patch headings which could include truncated lines +ignore-regex = ^@@ -[0-9].*@@.* ignore-words-list = fo,te,als,Acknowledgements,acknowledgements,weill,bu,winn,manuel builtin = clear,rare,en-GB_to_en-US # this overloads default dictionaries and I have not yet figured out diff --git a/tools/bids-2.0/README.md b/tools/bids-2.0/README.md index 860fcb875a..c787bb7c7d 100644 --- a/tools/bids-2.0/README.md +++ b/tools/bids-2.0/README.md @@ -2,3 +2,19 @@ This directory to contain various little helpers which would script desired automated changes (where possible) for migrating specification (not datasets) to BIDS 2.0. Ideally scripts should have some header pointing to the underlying issue they are addressing. + +## `apply_all` + +`apply_all` script goes through `patches/` in a (numeric) sorted order +and applies those "patches". + +"patches" could be of two types: + +- an executable -- a script to be executed which introduces the changes. +- `.patch` - a regular patch which needs to be applied using `patch -p1` + +Typically for the same leading index (e.g. `01`) there would be a script and +then a patch to possibly manually tune up remaining changes. + +`apply_all` could also take an index -- then it would stop applying patches +having applied patches up to that index. diff --git a/tools/bids-2.0/apply_all b/tools/bids-2.0/apply_all index a605b222f6..b486317cab 100755 --- a/tools/bids-2.0/apply_all +++ b/tools/bids-2.0/apply_all @@ -1,5 +1,29 @@ #!/bin/bash +set -eu + +# hardcoding for now - relative path from the top +rpath=tools/bids-2.0 + path="$(dirname "$0")" +cd "$path/../.." # go to the top of bids-specification + +apply_until=${1:-} + +# harmonize appearance +if [ -n "$apply_until" ] ; then + apply_until=$(printf "%02d" "$apply_until") +fi -"$path/rename_participants_to_subjects" +/bin/ls "$rpath"/patches/[0-9]* | sort -n | while read p; do + if [ "${p##*.}" == "patch" ]; then + echo "I: apply $p" + patch -p1 < $p + elif [ -x "$p" ] ; then + echo "I: run $p" + $p + else + echo "E: Do not know how to handle patch $p" >&2 + exit 1 + fi +done diff --git a/tools/bids-2.0/rename_participants_to_subjects b/tools/bids-2.0/patches/01-01-rename_participants_to_subjects similarity index 100% rename from tools/bids-2.0/rename_participants_to_subjects rename to tools/bids-2.0/patches/01-01-rename_participants_to_subjects diff --git a/tools/bids-2.0/patches/01-02-rename_participants_to_subjects-fixup.patch b/tools/bids-2.0/patches/01-02-rename_participants_to_subjects-fixup.patch new file mode 100644 index 0000000000..3c90141eef --- /dev/null +++ b/tools/bids-2.0/patches/01-02-rename_participants_to_subjects-fixup.patch @@ -0,0 +1,53 @@ +From 087982854fd000f6cb9d321faf25c68e4a2f1b1c Mon Sep 17 00:00:00 2001 +From: Yaroslav Halchenko +Date: Fri, 12 Apr 2024 15:54:31 -0400 +Subject: [PATCH] Post fixes to apply_all to fix some indentations due to + participant -> subject + +--- + src/appendices/coordinate-systems.md | 6 +++--- + src/schema/README.md | 4 ++-- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/src/appendices/coordinate-systems.md b/src/appendices/coordinate-systems.md +index 9eb57359..9cb031b0 100644 +--- a/src/appendices/coordinate-systems.md ++++ b/src/appendices/coordinate-systems.md +@@ -233,10 +233,10 @@ described in [Common file level metadata fields][common file level metadata fiel + + In the case of multiple study templates, additional names may need to be defined. + +-| **Coordinate System** | **Description** | +-| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ++| **Coordinate System** | **Description** | ++| --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | + | individual | Subject specific anatomical space (for example derived from T1w and/or T2w images). This coordinate system requires specifying an additional, subject-specific file to be fully defined. In context of surfaces this space has been referred to as `fsnative`. | +-| study | Custom space defined using a group/study-specific template. This coordinate system requires specifying an additional file to be fully defined. | ++| study | Custom space defined using a group/study-specific template. This coordinate system requires specifying an additional file to be fully defined. | + + ### Non-template coordinate system identifiers + +diff --git a/src/schema/README.md b/src/schema/README.md +index 0a6d7a3c..7587f01d 100644 +--- a/src/schema/README.md ++++ b/src/schema/README.md +@@ -243,7 +243,7 @@ and provide the *namespace* in which expressions are evaluated. + The following operators should be defined by an interpreter: + + | Operator | Definition | Example | +-| ----------- | ------------------------------------------------------------- | --------------------------------------------- | ++| ----------- | ------------------------------------------------------------- |-----------------------------------------------| + | `==` | equality | `suffix == "T1w"` | + | `!=` | inequality | `entities.task != "rest"` | + | `<`/`>` | less-than / greater-than | `sidecar.EchoTime < 0.5` | +@@ -253,7 +253,7 @@ The following operators should be defined by an interpreter: + | `&&` | conjunction, true if both RHS and LHS are true | `"Units" in sidecar && sidecar.Units == "mm"` | + | `\|\|` | disjunction, true if either RHS or LHS is true | `a < mn \|\| a > mx` | + | `.` | object query, returns value of subfield | `sidecar.Units` | +-| `[]` | array/string index, returns value of Nth element (0-indexed) | `columns.subject_label[0]` | ++| `[]` | array/string index, returns value of Nth element (0-indexed) | `columns.subject_label[0]` | + | `+` | numeric addition / string concatenation | `x + 1`, `stem + "suffix"` | + | `-`/`*`/`/` | numeric operators (division coerces to float) | `length(array) - 2`, `x * 2`, `1 / 2 == 0.5` | + +-- +2.43.0 From e8e5f23e5e6176cd5b43bc3343640d1a4bb79818 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 26 Apr 2024 15:19:24 -0400 Subject: [PATCH 12/13] Adjust workflows to apply patches and push patched version as well for introspection --- .github/workflows/schemacode_ci.yml | 9 ++++++ .github/workflows/validate_bids-examples.yml | 29 ++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/.github/workflows/schemacode_ci.yml b/.github/workflows/schemacode_ci.yml index 6cf4dbcd4c..cf4640578a 100644 --- a/.github/workflows/schemacode_ci.yml +++ b/.github/workflows/schemacode_ci.yml @@ -27,6 +27,15 @@ jobs: python-version: ["3.11"] steps: - uses: actions/checkout@v4 + + - name: "Apply patches for BIDS-2.0" + run: tools/bids-2.0/apply_all + + - name: "Show differences after patching" + run: | + git add . + git diff --cached + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/validate_bids-examples.yml b/.github/workflows/validate_bids-examples.yml index 7911ad5c82..d7f3fbe476 100644 --- a/.github/workflows/validate_bids-examples.yml +++ b/.github/workflows/validate_bids-examples.yml @@ -33,6 +33,35 @@ jobs: steps: - uses: actions/checkout@v4 + - name: "Apply patches for BIDS-2.0" + run: | + set -o pipefail + tools/bids-2.0/apply_all 2>&1 | tee /tmp/patch.log + + - name: "Show differences after patching" + run: | + git add . + git diff --cached + + - name: "Commit and push patched version online for possible introspection" + # Run only on a non-merge commit for the PR + if: > + github.repository == 'bids-standard/bids-specification' && + github.event.pull_request.head.ref == 'bids-2.0' && + github.event.pull_request.merge_commit_sha != github.sha + run: | + set -x + commit=$(git rev-parse HEAD) + branch=${GITHUB_HEAD_REF}-patched + git config --global user.email "github-ci@example.com" + git config --global user.name "BIDS 2.0 GitHub CI" + git checkout -b "$branch" + { + echo -e "Applied patches for ${GITHUB_HEAD_REF} to ${commit}\n"; + cat /tmp/patch.log; + } | git commit -F - + git push -f origin "$branch" + # Setup Python with bst - uses: actions/setup-python@v5 with: From 59fa77e13811d2f189f1b60b3c5bfc5b79ad6da2 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Sat, 27 Apr 2024 11:46:24 -0400 Subject: [PATCH 13/13] Absorb instructions into CONTRIBUTING-BIDS2.0.md to ease tune up etc --- CONTRIBUTING-BIDS2.0.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 CONTRIBUTING-BIDS2.0.md diff --git a/CONTRIBUTING-BIDS2.0.md b/CONTRIBUTING-BIDS2.0.md new file mode 100644 index 0000000000..5e8eae9681 --- /dev/null +++ b/CONTRIBUTING-BIDS2.0.md @@ -0,0 +1,33 @@ +# HOWTO for BIDS 2.0 development branch + +## References + +- Current PR: https://github.com/bids-standard/bids-specification/pull/1775 +- BIDS 2.0 Project: https://github.com/orgs/bids-standard/projects/10 +- Full list of issues to consider: https://github.com/bids-standard/bids-2-devel/issues +- https://github.com/bids-standard/bids-2-devel/issues/57 + +## Instructions + +- **Minimization of "persistent" comments: Do not comment in the main thread of this PR to avoid flooding it** + - Ideally: initiate PR from a feature branch with desired changes so we could concentrate discussion on the topic there + - If needed: start discussion as a comment attached to pertinent location in [diff view](https://github.com/bids-standard/bids-specification/pull/1775/files) so we could eventually resolve it to collapse +- **To contribute to this PR -- submit a PR against the `bids-2.0` branch** + - If you host that feature branch in this repository, it is RECOMMENDED to name it with `bids-2.0-` prefix. + - **WARNING**: this PR can rebase or otherwise modify its set of commits (squash etc), so it is recommended to keep your feature branch also succinct to just few commits so it would be obvious what to rebase on top of rebased [bids-2.0]. + - When changes accepted they would be incorporated without Merge commit and might undergo squashing into just few commits to reflect that change. + - Where relevant do not forget to reference or close an issue from [bids-2-devel/issues](https://github.com/bids-standard/bids-2-devel/issues) +- **Minimization of commits/diff**. To make it manageable to review, diff in this PR should avoid mass changes which could be scripted. + - For scripted changes there would be scripts and "fix up patches" collected (in this PR) under `tools/bids-2.0/` directory. See [tools/bids-2.0/README.md](https://github.com/bids-standard/bids-specification/pull/1775/files) and files under the [tools/bids-2.0/patches/] for more details and example(s). + - Those scripts will be applied on CI and changes pushed to [bids-2.0-patched](https://github.com/bids-standard/bids-specification/tree/bids-2.0) branch. + - you can review [diff bids-2.0..bids-2.0-patched](https://github.com/bids-standard/bids-specification/compare/bids-2.0..bids-2.0-patched) to see if scripts or fixup patches need to be adjusted + - that [bids-2.0-patched] is pushed by `validate-datasets` workflow + - rendered: https://bids-specification.readthedocs.io/en/bids-2.0-patched/ + - if fixes spotted needed on top of [bids-2.0-patched] - they should be committed on top and `git format-patch` and added to `tools/bids-2.0/patches/` or absorbed into already existing patches there in. + +[tools/bids-2.0/README.md]: https://github.com/bids-standard/bids-specification/pull/1775/files +[bids-2.0]: https://github.com/bids-standard/bids-specification/tree/bids-2.0 +[bids-2.0-patched]: https://github.com/bids-standard/bids-specification/tree/bids-2.0-patched +[tools/bids-2.0/]: https://github.com/bids-standard/bids-specification/tree/bids-2.0/tools/bids-2.0/ +[tools/bids-2.0/README.md]: https://github.com/bids-standard/bids-specification/tree/bids-2.0/tools/bids-2.0/README.md +[tools/bids-2.0/patches/]: https://github.com/bids-standard/bids-specification/tree/bids-2.0/tools/bids-2.0/patches