From 41e15fa4762a4a9df352280ae6e7bb2869577932 Mon Sep 17 00:00:00 2001 From: DBIC BIDS Team Date: Fri, 17 Mar 2023 13:33:08 -0400 Subject: [PATCH 1/7] Adding TODO for a useful command to have to remove subject to be redone --- bin/reproin | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/bin/reproin b/bin/reproin index eedc9aa..c934d9b 100755 --- a/bin/reproin +++ b/bin/reproin @@ -173,6 +173,19 @@ study-create) # after creating a dataset tag it with 0.0.0 # This would allow for a sensible git describe output ;; +study-remove-subject) + # TODO: provision for possibly having a specific session not entire subject + git rm -r sub-$sid sourcedata/sub-$sid .heudiconv/$sid + echo "not implemented" + exit 1 + ;; +study-remove-subject2redo) + echo "not implemented" + exit 1 + # figure out where came from + $0 study-remove-subject + # add original location to skip file + ;; study-accession-skip) if [ -L "$skipfile" ]; then ( From adfb704ee03051d856085792ef4eb14492d111d1 Mon Sep 17 00:00:00 2001 From: DBIC BIDS Team Date: Tue, 17 Oct 2023 11:35:17 -0400 Subject: [PATCH 2/7] Prototype of lists-check command found uncommitted - to find issues problems etc --- bin/reproin | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/bin/reproin b/bin/reproin index bb01855..0a5f0ff 100755 --- a/bin/reproin +++ b/bin/reproin @@ -21,6 +21,11 @@ function error() { echo "# ERROR: $@" } +function debug() { + # echo "# DEBUG: $@" + : +} + # # A Master run script for a study @@ -67,6 +72,90 @@ lists-update) eval "$heudiconvcmd -f $heuristic --command ls --files $dicomdir/$Y/$M/$D/*/00*cout*" >| "$listfile" exit 0 ;; +lists-check) + # TODO: add an option to just update when discrepnancy found + doit=1 + Y=${2:-20??}; + M=${3:-??}; + declare -A todo + declare -A groups + for d in $dicomdir/$Y/$M; do + #D=$(basename $d) + M=$(basename $d) + Y=$(dirname $d | xargs basename) + if [ $Y = 2016 ]; then + # skip that early one + continue + fi + listfile="$listdir/$Y${M}xx.txt" + if [ ! -e $listfile ]; then + echo "I: no $listfile" + if /bin/ls -d $dicomdir/$Y/$M/*/*/00*cout*; then + echo "E: there were legitimatish accession folders with scouts!" + todo["${M}_$Y"]="scouts" + fi + continue + fi + missing= + for a in $dicomdir/$Y/$M/*/*; do + # echo "$a" + case "$a" in + *_backup) + if [ -e "${a%_backup}" ] ; then + echo "skip odd backup $a for which original also exists"; + continue + fi + ;; + esac + # ATM we react only to ones having /00*cout* + scouts=$(/bin/ls -1 $a/00*cout* 2>&1 | head -n 1) + if echo "$scouts" | grep -q "No such file or directory"; then + debug "no scouts under $a" + # ls -l $a + continue + fi + if ! grep -q "^$a" "$listfile"; then + debug "scouts: >$scouts<" + if echo "$scouts" | grep -qi 'permission'; then + # no need to try to update list if we have permissions issue + # ATM + reason="permissions? $(ls -dl $a)" + groups[$(stat -c '%G' "$a")]+="$a " + else + reason="unknown" + missing+="$a" + fi + echo "$a is missing: $reason" + fi + done + if [ -n "$missing" ]; then + todo["${M}_$Y"]="missing" + fi + done + exit_code=0 + if [[ -n "${todo[@]}" ]]; then + echo "List of TODOs:" + for MY in "${!todo[@]}"; do + Y=${MY#*_} + M=${MY%_*} + cmd="'$0' lists-update $Y $M" + if [ -n "$doit" ]; then + $cmd + else + echo $cmd + fi + done + ((exit_code += 1)) + fi + if [[ -n "${groups[@]}" ]]; then + echo "List of groups for which permissions fail: ${!groups[@]}" + for g in "${!groups[@]}"; do + echo " $g: ${groups[$g]}" + done + ((exit_code += 2)) + fi + exit $exit_code + ;; lists-update-summary) # to be used on the stderr output from heudiconv --command ls or lists-update ran by cron job sed -ne "/StudySes/s,.*locator='\([^']*\)'.*,\1,gp" | sort | uniq -c | sort -nr From a34f82ea9940052763b0cdbb64cc7c2d809a183e Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 7 Dec 2023 22:09:04 -0500 Subject: [PATCH 3/7] embed code from cfg_metadatatypes which is now in deprecated --- resources/cfg_reproin_bids.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/resources/cfg_reproin_bids.py b/resources/cfg_reproin_bids.py index 5925ec2..7ba84c4 100755 --- a/resources/cfg_reproin_bids.py +++ b/resources/cfg_reproin_bids.py @@ -9,8 +9,10 @@ import os import sys from pathlib import Path # for compatibility with older DataLad lacking .pathobj +from datalad.consts import DATASET_CONFIG_FILE from datalad.distribution.dataset import require_dataset from datalad.support import path as op +from datalad.utils import ensure_tuple_or_list #from datalad.support.external_versions import external_versions as ev ds = require_dataset( @@ -97,7 +99,20 @@ def add_line_to_file(subpath, line): message="Apply default ReproIn BIDS dataset setup", ) -# run metadata type config last, will do another another commit -ds.run_procedure( - spec=['cfg_metadatatypes', 'bids', 'nifti1'], +existing_types = ensure_tuple_or_list( + ds.config.get('datalad.metadata.nativetype', [], get_all=True)) +for nt in 'bids', 'nifti1': + if nt in existing_types: + # do not duplicate + continue + ds.config.add( + 'datalad.metadata.nativetype', + nt, + scope='branch', + reload=False) + +ds.save( + path=op.join(ds.path, DATASET_CONFIG_FILE), + message="Configure metadata type(s)", + result_renderer='disabled' ) From 171e34f6accb6161053ff7c4af73d0e00e089127 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 7 Dec 2023 22:18:36 -0500 Subject: [PATCH 4/7] Use bundled copy of the cfg procedure -- point to its location --- bin/reproin | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/reproin b/bin/reproin index 0a5f0ff..2d85e4a 100755 --- a/bin/reproin +++ b/bin/reproin @@ -52,6 +52,8 @@ valconfig=".bids-validator-config.json" heudiconvcmd="heudiconv -c dcm2niix --bids -o $bidsdir -g accession_number" self=$(realpath "$0") +selftop=$(dirname "$self" | xargs dirname) + action="$1" # early @@ -315,7 +317,7 @@ study-create) cd "$d" done cd "$studydir" - datalad run-procedure cfg_reproin_bids + datalad -c datalad.locations.user-procedures="$selftop/resources/" run-procedure cfg_reproin_bids git tag -m "The beginning" 0.0.0 # after creating a dataset tag it with 0.0.0 # This would allow for a sensible git describe output From 57b9030e65419b3742cc2cc835b79a1fe80e2ca9 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 7 Dec 2023 22:19:05 -0500 Subject: [PATCH 5/7] Robustify (and make possible to specify from outside) creation of initial hierarchy --- bin/reproin | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bin/reproin b/bin/reproin index 2d85e4a..1d62e56 100755 --- a/bin/reproin +++ b/bin/reproin @@ -39,7 +39,7 @@ if [ "$#" = 0 ]; then fi dicomdir="/inbox/DICOM" -bidsdir="/inbox/BIDS" +bidsdir="${BIDS_DIR:-/inbox/BIDS}" listdir="$bidsdir/reproin/lists" heuristic="reproin" @@ -300,6 +300,9 @@ study-create) echo "$study already exists, nothing todo" exit 1; fi + if [ ! -e "$bidsdir" ] ; then + datalad create -c text2git "$bidsdir" + fi cd "$bidsdir" echo "$study" | tr '/' '\n' \ | while read d; do @@ -307,9 +310,9 @@ study-create) if [ "$PWD/$d" == "$studydir" ]; then datalad create --fake-dates -d . "$d" else - datalad create -d . "$d" + datalad create -c text2git -d . "$d" fi - if ! grep -q "\.nfs" "$d/.gitignore"; then + if ! grep -q "\.nfs" "$d/.gitignore" 2>/dev/null; then echo ".nfs*" >> "$d/.gitignore" datalad save -d "$d" -m "ignore .nfs* files" "$d/.gitignore" fi From fc40ece6ac6896b6281c5034ed4796648f49ed3f Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 7 Dec 2023 23:20:10 -0500 Subject: [PATCH 6/7] Add initial version of setup_containers --- bin/reproin | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/bin/reproin b/bin/reproin index 1d62e56..eed0596 100755 --- a/bin/reproin +++ b/bin/reproin @@ -27,6 +27,29 @@ function debug() { } +function setup_containers() { + if [ -e code/containers ]; then + error "There is already code/containers" + exit 1 + fi + mkdir -p code + datalad clone -d . --reckless=ephemeral "$local_containers" code/containers + # but make stored url to point to public resource + # TODO -- take it from the ephemeral default location + git config --file ./.gitmodules submodule."code/containers".url https://datasets.datalad.org/repronim/containers/.git + git config --file ./.gitmodules submodule."code/containers".datalad-url https://datasets.datalad.org/repronim/containers/.git + ( + cd code/containers/ + scripts/freeze_versions --save-dataset=../../ repronim-reproin + ) + # add bind mounts since we managed to contain it all nicely due to use + # of datalad to manage/create hierarchy within heudiconv + # TODO: make it so inside would not rely/really need hierarchy but also gets input data + cfg=datalad."containers.repronim-reproin".cmdexec ; git config -f .datalad/config "${cfg}" | sed -e "s,{img},-B '$dicomdir' -B '$bidsdir' {img},g" | | tr -d '\n' | xargs -0 git config -f .datalad/config ${cfg} + + datalad save -m "Saving tune ups to enable using the embedded container with reproin" .gitmodules .datalad/config +} + # # A Master run script for a study # @@ -41,6 +64,8 @@ fi dicomdir="/inbox/DICOM" bidsdir="${BIDS_DIR:-/inbox/BIDS}" listdir="$bidsdir/reproin/lists" +# TODO: make it possible to not rely on local clone or prepopulate if not present?! +local_containers="~/repronim-containers" heuristic="reproin" @@ -267,6 +292,11 @@ study-show-summary) echo " $studyshow.sh $date_modified" exit 0 ;; +setup-containers) + # just operates in a current folder + setup_containers + exit 0 + ;; esac # The rest of the commands operate on a given study @@ -324,6 +354,9 @@ study-create) git tag -m "The beginning" 0.0.0 # after creating a dataset tag it with 0.0.0 # This would allow for a sensible git describe output + + # Embrace containerization setup + setup_containers ;; study-remove-subject) # TODO: provision for possibly having a specific session not entire subject From 2e9c481842c64d8ff92266d925bb98e8e8c0c5b1 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 7 Dec 2023 23:31:02 -0500 Subject: [PATCH 7/7] Helper to set it up to use development version of reproin script --- bin/reproin | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/bin/reproin b/bin/reproin index eed0596..b727f33 100755 --- a/bin/reproin +++ b/bin/reproin @@ -45,11 +45,21 @@ function setup_containers() { # add bind mounts since we managed to contain it all nicely due to use # of datalad to manage/create hierarchy within heudiconv # TODO: make it so inside would not rely/really need hierarchy but also gets input data - cfg=datalad."containers.repronim-reproin".cmdexec ; git config -f .datalad/config "${cfg}" | sed -e "s,{img},-B '$dicomdir' -B '$bidsdir' {img},g" | | tr -d '\n' | xargs -0 git config -f .datalad/config ${cfg} - + cfg=datalad."containers.repronim-reproin".cmdexec ; git config -f .datalad/config "${cfg}" | sed -e "s,{img},-B '$dicomdir' -B '$bidsdir' {img},g" | tr -d '\n' | xargs -0 git config -f .datalad/config ${cfg} datalad save -m "Saving tune ups to enable using the embedded container with reproin" .gitmodules .datalad/config } +function setup_devel_reproin() { + # overload reproin in container to use the one from github since we have not released/placed into container yet + if [ ! -e code/containers ]; then + error "Must have setup_containers already" + exit 1 + fi + datalad clone -d . https://github.com/ReproNim/reproin code/reproin + cfg=datalad."containers.repronim-reproin".cmdexec ; git config -f .datalad/config "${cfg}" | sed -e 's,{img} {cmd}.*,-B {img_dspath}/code/reproin/bin/reproin:/usr/local/bin/reproin {img} /usr/local/bin/reproin {cmd},g' | tr -d '\n' | xargs -0 git config -f .datalad/config ${cfg} + datalad save -m "Bundle/use development version of reproin script for now inside the container" .gitmodules .datalad/config +} + # # A Master run script for a study # @@ -297,6 +307,11 @@ setup-containers) setup_containers exit 0 ;; +setup-devel-reproin) + # just operates in a current folder + setup_devel_reproin + exit 0 + ;; esac # The rest of the commands operate on a given study @@ -357,6 +372,7 @@ study-create) # Embrace containerization setup setup_containers + setup_devel_reproin ;; study-remove-subject) # TODO: provision for possibly having a specific session not entire subject