From b142b968f4080d5d3833fcdf9a415d78bfa6e96d Mon Sep 17 00:00:00 2001 From: AliceJoubert <158147135+AliceJoubert@users.noreply.github.com> Date: Mon, 3 Jun 2024 11:58:40 +0200 Subject: [PATCH] [ENH] ADNI-to-BIDS converts only subjects in the provided ADNI directory if no subjects list is given (#1196) * Basic solution provided * Regex to find subjects * Create new method to return a subject list * Small fix * Change logic * Handle clinical data * Add unit tests * Changes upon suggestions * change API * Small Fix --- .../adni_modalities/adni_av45_fbb_pet.py | 8 +-- .../adni_to_bids/adni_modalities/adni_dwi.py | 6 +- .../adni_modalities/adni_fdg_pet.py | 5 +- .../adni_modalities/adni_flair.py | 6 +- .../adni_to_bids/adni_modalities/adni_fmap.py | 6 +- .../adni_to_bids/adni_modalities/adni_fmri.py | 6 +- .../adni_modalities/adni_pib_pet.py | 6 +- .../adni_to_bids/adni_modalities/adni_t1.py | 6 +- .../adni_modalities/adni_tau_pet.py | 6 +- .../converters/adni_to_bids/adni_to_bids.py | 35 +++-------- .../converters/adni_to_bids/adni_utils.py | 62 ++++++++++++++++++- .../adni_to_bids/test_adni_utils.py | 56 +++++++++++++++++ 12 files changed, 134 insertions(+), 74 deletions(-) diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_av45_fbb_pet.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_av45_fbb_pet.py index 842d885f9..855e15d5d 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_av45_fbb_pet.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_av45_fbb_pet.py @@ -1,6 +1,6 @@ """Module for converting AV45 and Florbetaben PET of ADNI.""" from os import PathLike -from typing import List, Optional +from typing import List def convert_adni_av45_fbb_pet( @@ -8,7 +8,7 @@ def convert_adni_av45_fbb_pet( csv_dir: PathLike, destination_dir: PathLike, conversion_dir: PathLike, - subjects: Optional[List[str]] = None, + subjects: List[str], mod_to_update: bool = False, n_procs: int = 1, ): @@ -50,10 +50,6 @@ def convert_adni_av45_fbb_pet( ) from clinica.utils.stream import cprint - if not subjects: - adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") - subjects = list(adni_merge.PTID.unique()) - cprint( f"Calculating paths of AV45 and Florbetaben PET images. Output will be stored in {conversion_dir}." ) diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_dwi.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_dwi.py index d7f1513fc..d64a455d6 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_dwi.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_dwi.py @@ -8,7 +8,7 @@ def convert_adni_dwi( csv_dir: PathLike, destination_dir: PathLike, conversion_dir: PathLike, - subjects: Optional[List[str]] = None, + subjects: List[str], mod_to_update: bool = False, n_procs: Optional[int] = 1, ): @@ -50,10 +50,6 @@ def convert_adni_dwi( ) from clinica.utils.stream import cprint - if not subjects: - adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") - subjects = list(adni_merge.PTID.unique()) - cprint( f"Calculating paths of DWI images. Output will be stored in {conversion_dir}." ) diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fdg_pet.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fdg_pet.py index eb0ecc977..fb583e699 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fdg_pet.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fdg_pet.py @@ -47,7 +47,7 @@ def _convert_adni_fdg_pet( destination_dir: PathLike, conversion_dir: PathLike, preprocessing_step: ADNIPreprocessingStep, - subjects: Optional[List[str]] = None, + subjects: List[str], mod_to_update: bool = False, n_procs: Optional[int] = 1, ): @@ -92,9 +92,6 @@ def _convert_adni_fdg_pet( ) from clinica.utils.stream import cprint - if subjects is None: - adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") - subjects = list(adni_merge.PTID.unique()) cprint( "Calculating paths of FDG PET images. " f"Output will be stored in {conversion_dir}." diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_flair.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_flair.py index b112dad84..7fc81e0e7 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_flair.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_flair.py @@ -8,7 +8,7 @@ def convert_adni_flair( csv_dir: PathLike, destination_dir: PathLike, conversion_dir: PathLike, - subjects: Optional[List[str]] = None, + subjects: List[str], mod_to_update: bool = False, n_procs: Optional[int] = 1, ): @@ -46,10 +46,6 @@ def convert_adni_flair( ) from clinica.utils.stream import cprint - if not subjects: - adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") - subjects = list(adni_merge.PTID.unique()) - cprint( f"Calculating paths of FLAIR images. Output will be stored in {conversion_dir}.", lvl="info", diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmap.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmap.py index b51e2d2b8..a2bb1ce9b 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmap.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmap.py @@ -19,7 +19,7 @@ def convert_adni_fmap( csv_dir: PathLike, destination_dir: PathLike, conversion_dir: PathLike, - subjects: Optional[List[str]] = None, + subjects: List[str], mod_to_update: bool = False, n_procs: Optional[int] = 1, ): @@ -58,10 +58,6 @@ def convert_adni_fmap( source_dir = Path(source_dir) conversion_dir = Path(conversion_dir) - if not subjects: - adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") - subjects = list(adni_merge.PTID.unique()) - cprint( f"Calculating paths of fMRI field maps (FMAPs). Output will be stored in {conversion_dir}.", lvl="debug", diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmri.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmri.py index 1e3581583..a74c9c407 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmri.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmri.py @@ -10,7 +10,7 @@ def convert_adni_fmri( csv_dir: PathLike, destination_dir: PathLike, conversion_dir: PathLike, - subjects: Optional[List[str]] = None, + subjects: List[str], mod_to_update: bool = False, n_procs: Optional[int] = 1, convert_multiband: bool = True, @@ -54,10 +54,6 @@ def convert_adni_fmri( ) from clinica.utils.stream import cprint - if not subjects: - adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") - subjects = list(adni_merge.PTID.unique()) - cprint( f"Calculating paths of fMRI images. Output will be stored in {conversion_dir}." ) diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_pib_pet.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_pib_pet.py index 8a39840c9..1856a33b2 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_pib_pet.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_pib_pet.py @@ -8,7 +8,7 @@ def convert_adni_pib_pet( csv_dir: PathLike, destination_dir: PathLike, conversion_dir: PathLike, - subjects: Optional[List[str]] = None, + subjects: List[str], mod_to_update: bool = False, n_procs: Optional[int] = 1, ): @@ -50,10 +50,6 @@ def convert_adni_pib_pet( ) from clinica.utils.stream import cprint - if not subjects: - adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") - subjects = list(adni_merge.PTID.unique()) - cprint( f"Calculating paths of PIB PET images. Output will be stored in {conversion_dir}." ) diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_t1.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_t1.py index 67a1967ac..3e0c99f6d 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_t1.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_t1.py @@ -8,7 +8,7 @@ def convert_adni_t1( csv_dir: PathLike, destination_dir: PathLike, conversion_dir: PathLike, - subjects: Optional[List[str]] = None, + subjects: List[str], mod_to_update: bool = False, n_procs: Optional[int] = 1, ): @@ -51,10 +51,6 @@ def convert_adni_t1( ) from clinica.utils.stream import cprint - if not subjects: - adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") - subjects = list(adni_merge.PTID.unique()) - cprint( f"Calculating paths of T1 images. Output will be stored in {conversion_dir}." ) diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_tau_pet.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_tau_pet.py index 2fd77cdb3..fea7bfa8a 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_tau_pet.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_tau_pet.py @@ -8,7 +8,7 @@ def convert_adni_tau_pet( csv_dir: PathLike, destination_dir: PathLike, conversion_dir: PathLike, - subjects: Optional[List[str]] = None, + subjects: List[str], mod_to_update: bool = False, n_procs: Optional[int] = 1, ): @@ -50,10 +50,6 @@ def convert_adni_tau_pet( ) from clinica.utils.stream import cprint - if not subjects: - adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") - subjects = list(adni_merge.PTID.unique()) - cprint( f"Calculating paths of TAU PET images. Output will be stored in {conversion_dir}." ) diff --git a/clinica/iotools/converters/adni_to_bids/adni_to_bids.py b/clinica/iotools/converters/adni_to_bids/adni_to_bids.py index b950403e4..7092a2229 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_to_bids.py +++ b/clinica/iotools/converters/adni_to_bids/adni_to_bids.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import List, Optional from clinica.iotools.abstract_converter import Converter @@ -191,12 +192,11 @@ def convert_images( source_dir: path to the ADNI directory clinical_dir: path to the clinical data directory dest_dir: path to the BIDS directory - subjs_list_path: list of subjects to process + subjs_list_path: Path to list of subjects to process modalities: modalities to convert (T1, PET_FDG, PET_AMYLOID, PET_TAU, DWI, FLAIR, fMRI) force_new_extraction: if given pre-existing images in the BIDS directory will be erased and extracted again. """ import os - from copy import copy from os import path import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_av45_fbb_pet as adni_av45_fbb @@ -208,36 +208,17 @@ def convert_images( import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_pib_pet as adni_pib import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_t1 as adni_t1 import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_tau_pet as adni_tau - from clinica.iotools.converters.adni_to_bids.adni_utils import ( - load_clinical_csv, - ) + from clinica.iotools.converters.adni_to_bids.adni_utils import get_subjects_list from clinica.utils.stream import cprint modalities = modalities or self.get_modalities_supported() - adni_merge = load_clinical_csv(clinical_dir, "ADNIMERGE") - - # Load a file with subjects list or compute all the subjects - if subjs_list_path is not None: - cprint("Loading a subjects lists provided by the user...") - subjs_list = [line.rstrip("\n") for line in open(subjs_list_path)] - subjs_list_copy = copy(subjs_list) - - # Check that there are no errors in subjs_list given by the user - for subj in subjs_list_copy: - adnimerge_subj = adni_merge[adni_merge.PTID == subj] + if subjs_list_path: + subjs_list_path = Path(subjs_list_path) - if len(adnimerge_subj) == 0: - cprint( - msg=f"Subject with PTID {subj} does not exist. Please check your subjects list.", - lvl="warning", - ) - subjs_list.remove(subj) - del subjs_list_copy - - else: - cprint("Using all the subjects contained into the ADNIMERGE.csv file...") - subjs_list = list(adni_merge["PTID"].unique()) + subjs_list = get_subjects_list( + Path(source_dir), Path(clinical_dir), subjs_list_path + ) # Create the output folder if is not already existing os.makedirs(dest_dir, exist_ok=True) diff --git a/clinica/iotools/converters/adni_to_bids/adni_utils.py b/clinica/iotools/converters/adni_to_bids/adni_utils.py index 48f0867d2..b6e9a1008 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_utils.py +++ b/clinica/iotools/converters/adni_to_bids/adni_utils.py @@ -29,6 +29,62 @@ def from_string(cls, study_name: str): ) +def _define_subjects_list( + source_dir: Path, + subjs_list_path: Optional[Path] = None, +) -> List[str]: + # todo : here or in utils for all converters ? + import re + + from clinica.utils.stream import cprint + + if subjs_list_path: + cprint("Loading a subjects lists provided by the user...") + return subjs_list_path.read_text().splitlines() + + cprint(f"Using the subjects contained in the ADNI dataset at {source_dir}") + rgx = re.compile(r"\d{3}_S_\d{4}") + return list(filter(rgx.fullmatch, [folder.name for folder in source_dir.iterdir()])) + + +def _check_subjects_list( + subjs_list: List[str], + clinical_dir: Path, +) -> List[str]: + from copy import copy + + from clinica.utils.stream import cprint + + subjs_list_copy = copy(subjs_list) + adni_merge = load_clinical_csv(str(clinical_dir), "ADNIMERGE") + # Check that there are no errors in subjs_list given by the user + for subj in subjs_list_copy: + adnimerge_subj = adni_merge[adni_merge.PTID == subj] + if len(adnimerge_subj) == 0: + cprint( + msg=f"Subject with PTID {subj} does not have corresponding clinical data." + f"Please check your subjects list or directory.", + lvl="warning", + ) + subjs_list.remove(subj) + del subjs_list_copy + + if not subjs_list: + cprint(f"Processing an empty list of subjects.", lvl="warning") + + return subjs_list + + +def get_subjects_list( + source_dir: Path, + clinical_dir: Path, + subjs_list_path: Optional[Path] = None, +) -> List[str]: + return _check_subjects_list( + _define_subjects_list(source_dir, subjs_list_path), clinical_dir + ) + + def visits_to_timepoints( subject, mri_list_subj, @@ -981,8 +1037,10 @@ def create_adni_sessions_dict( ] df_subj_session = pd.concat([df_subj_session, df_filtered], axis=1) if df_subj_session.empty: - raise ValueError("Empty dataset detected. Clinical data cannot be extracted.") - + cprint( + "Empty dataset detected. Clinical data cannot be extracted.", lvl="warning" + ) + return # Nv/None refer to sessions whose session is undefined. "sc" is the screening session with unreliable (incomplete) # data. df_subj_session = df_subj_session[ diff --git a/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py b/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py index a529f1341..41907acef 100644 --- a/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py +++ b/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py @@ -5,6 +5,62 @@ from pandas.testing import assert_frame_equal, assert_series_equal +@pytest.mark.parametrize( + "input, expected", + [ + ( + {"001_S_0001", "001_S_0002", "001_S_0003"}, + {"001_S_0001", "001_S_0002", "001_S_0003"}, + ), + ({"001_S_0001", "001_S_00014", ".001_S_0001", "001S0001"}, {"001_S_0001"}), + ], +) +def test_define_subjects_list_directory(tmp_path, input, expected): + from clinica.iotools.converters.adni_to_bids.adni_utils import _define_subjects_list + + source_dir = tmp_path / "source_dir" + source_dir.mkdir() + + for subject in input: + (source_dir / subject).touch() + + assert set(_define_subjects_list(source_dir)) == expected + + +def test_define_subjects_list_txt(tmp_path): + from clinica.iotools.converters.adni_to_bids.adni_utils import _define_subjects_list + + source_dir = tmp_path / "source_dir" + subjs_list_path = tmp_path / "subjects_list.txt" + input = {"001_S_0001", "001_S_00022", "001S0003"} + with open(subjs_list_path, "w") as f: + f.write("\n".join(input)) + + assert set(_define_subjects_list(source_dir, subjs_list_path)) == input + + +@pytest.mark.parametrize( + "write_all, input, expected", + [ + (True, ["001_S_0001", "001_S_0002"], {"001_S_0001", "001_S_0002"}), + (False, ["001_S_0001", "001_S_0002"], {"001_S_0001"}), + ], +) +def test_check_subjects_list(tmp_path, write_all, input, expected): + from clinica.iotools.converters.adni_to_bids.adni_utils import _check_subjects_list + + clinical_dir = tmp_path / "clinical_dir" + clinical_dir.mkdir() + + if not write_all: + input.pop() + + adni_df = pd.DataFrame(columns=["PTID"], data=input) + adni_df.to_csv(clinical_dir / "ADNIMERGE.csv") + + assert set(_check_subjects_list(input, clinical_dir)) == expected + + @pytest.mark.parametrize( "input_value,expected", [