Skip to content

Commit

Permalink
[ENH] ADNI-to-BIDS converts only subjects in the provided ADNI direct…
Browse files Browse the repository at this point in the history
…ory if no subjects list is given (#1196)

* Basic solution provided

* Regex to find subjects

* Create new method to return a subject list

* Small fix

* Change logic

* Handle clinical data

* Add unit tests

* Changes upon suggestions

* change API

* Small Fix
  • Loading branch information
AliceJoubert authored Jun 3, 2024
1 parent 1f554c6 commit b142b96
Show file tree
Hide file tree
Showing 12 changed files with 134 additions and 74 deletions.
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
"""Module for converting AV45 and Florbetaben PET of ADNI."""
from os import PathLike
from typing import List, Optional
from typing import List


def convert_adni_av45_fbb_pet(
source_dir: PathLike,
csv_dir: PathLike,
destination_dir: PathLike,
conversion_dir: PathLike,
subjects: Optional[List[str]] = None,
subjects: List[str],
mod_to_update: bool = False,
n_procs: int = 1,
):
Expand Down Expand Up @@ -50,10 +50,6 @@ def convert_adni_av45_fbb_pet(
)
from clinica.utils.stream import cprint

if not subjects:
adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE")
subjects = list(adni_merge.PTID.unique())

cprint(
f"Calculating paths of AV45 and Florbetaben PET images. Output will be stored in {conversion_dir}."
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def convert_adni_dwi(
csv_dir: PathLike,
destination_dir: PathLike,
conversion_dir: PathLike,
subjects: Optional[List[str]] = None,
subjects: List[str],
mod_to_update: bool = False,
n_procs: Optional[int] = 1,
):
Expand Down Expand Up @@ -50,10 +50,6 @@ def convert_adni_dwi(
)
from clinica.utils.stream import cprint

if not subjects:
adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE")
subjects = list(adni_merge.PTID.unique())

cprint(
f"Calculating paths of DWI images. Output will be stored in {conversion_dir}."
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _convert_adni_fdg_pet(
destination_dir: PathLike,
conversion_dir: PathLike,
preprocessing_step: ADNIPreprocessingStep,
subjects: Optional[List[str]] = None,
subjects: List[str],
mod_to_update: bool = False,
n_procs: Optional[int] = 1,
):
Expand Down Expand Up @@ -92,9 +92,6 @@ def _convert_adni_fdg_pet(
)
from clinica.utils.stream import cprint

if subjects is None:
adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE")
subjects = list(adni_merge.PTID.unique())
cprint(
"Calculating paths of FDG PET images. "
f"Output will be stored in {conversion_dir}."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def convert_adni_flair(
csv_dir: PathLike,
destination_dir: PathLike,
conversion_dir: PathLike,
subjects: Optional[List[str]] = None,
subjects: List[str],
mod_to_update: bool = False,
n_procs: Optional[int] = 1,
):
Expand Down Expand Up @@ -46,10 +46,6 @@ def convert_adni_flair(
)
from clinica.utils.stream import cprint

if not subjects:
adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE")
subjects = list(adni_merge.PTID.unique())

cprint(
f"Calculating paths of FLAIR images. Output will be stored in {conversion_dir}.",
lvl="info",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def convert_adni_fmap(
csv_dir: PathLike,
destination_dir: PathLike,
conversion_dir: PathLike,
subjects: Optional[List[str]] = None,
subjects: List[str],
mod_to_update: bool = False,
n_procs: Optional[int] = 1,
):
Expand Down Expand Up @@ -58,10 +58,6 @@ def convert_adni_fmap(
source_dir = Path(source_dir)
conversion_dir = Path(conversion_dir)

if not subjects:
adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE")
subjects = list(adni_merge.PTID.unique())

cprint(
f"Calculating paths of fMRI field maps (FMAPs). Output will be stored in {conversion_dir}.",
lvl="debug",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def convert_adni_fmri(
csv_dir: PathLike,
destination_dir: PathLike,
conversion_dir: PathLike,
subjects: Optional[List[str]] = None,
subjects: List[str],
mod_to_update: bool = False,
n_procs: Optional[int] = 1,
convert_multiband: bool = True,
Expand Down Expand Up @@ -54,10 +54,6 @@ def convert_adni_fmri(
)
from clinica.utils.stream import cprint

if not subjects:
adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE")
subjects = list(adni_merge.PTID.unique())

cprint(
f"Calculating paths of fMRI images. Output will be stored in {conversion_dir}."
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def convert_adni_pib_pet(
csv_dir: PathLike,
destination_dir: PathLike,
conversion_dir: PathLike,
subjects: Optional[List[str]] = None,
subjects: List[str],
mod_to_update: bool = False,
n_procs: Optional[int] = 1,
):
Expand Down Expand Up @@ -50,10 +50,6 @@ def convert_adni_pib_pet(
)
from clinica.utils.stream import cprint

if not subjects:
adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE")
subjects = list(adni_merge.PTID.unique())

cprint(
f"Calculating paths of PIB PET images. Output will be stored in {conversion_dir}."
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def convert_adni_t1(
csv_dir: PathLike,
destination_dir: PathLike,
conversion_dir: PathLike,
subjects: Optional[List[str]] = None,
subjects: List[str],
mod_to_update: bool = False,
n_procs: Optional[int] = 1,
):
Expand Down Expand Up @@ -51,10 +51,6 @@ def convert_adni_t1(
)
from clinica.utils.stream import cprint

if not subjects:
adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE")
subjects = list(adni_merge.PTID.unique())

cprint(
f"Calculating paths of T1 images. Output will be stored in {conversion_dir}."
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def convert_adni_tau_pet(
csv_dir: PathLike,
destination_dir: PathLike,
conversion_dir: PathLike,
subjects: Optional[List[str]] = None,
subjects: List[str],
mod_to_update: bool = False,
n_procs: Optional[int] = 1,
):
Expand Down Expand Up @@ -50,10 +50,6 @@ def convert_adni_tau_pet(
)
from clinica.utils.stream import cprint

if not subjects:
adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE")
subjects = list(adni_merge.PTID.unique())

cprint(
f"Calculating paths of TAU PET images. Output will be stored in {conversion_dir}."
)
Expand Down
35 changes: 8 additions & 27 deletions clinica/iotools/converters/adni_to_bids/adni_to_bids.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pathlib import Path
from typing import List, Optional

from clinica.iotools.abstract_converter import Converter
Expand Down Expand Up @@ -191,12 +192,11 @@ def convert_images(
source_dir: path to the ADNI directory
clinical_dir: path to the clinical data directory
dest_dir: path to the BIDS directory
subjs_list_path: list of subjects to process
subjs_list_path: Path to list of subjects to process
modalities: modalities to convert (T1, PET_FDG, PET_AMYLOID, PET_TAU, DWI, FLAIR, fMRI)
force_new_extraction: if given pre-existing images in the BIDS directory will be erased and extracted again.
"""
import os
from copy import copy
from os import path

import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_av45_fbb_pet as adni_av45_fbb
Expand All @@ -208,36 +208,17 @@ def convert_images(
import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_pib_pet as adni_pib
import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_t1 as adni_t1
import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_tau_pet as adni_tau
from clinica.iotools.converters.adni_to_bids.adni_utils import (
load_clinical_csv,
)
from clinica.iotools.converters.adni_to_bids.adni_utils import get_subjects_list
from clinica.utils.stream import cprint

modalities = modalities or self.get_modalities_supported()

adni_merge = load_clinical_csv(clinical_dir, "ADNIMERGE")

# Load a file with subjects list or compute all the subjects
if subjs_list_path is not None:
cprint("Loading a subjects lists provided by the user...")
subjs_list = [line.rstrip("\n") for line in open(subjs_list_path)]
subjs_list_copy = copy(subjs_list)

# Check that there are no errors in subjs_list given by the user
for subj in subjs_list_copy:
adnimerge_subj = adni_merge[adni_merge.PTID == subj]
if subjs_list_path:
subjs_list_path = Path(subjs_list_path)

if len(adnimerge_subj) == 0:
cprint(
msg=f"Subject with PTID {subj} does not exist. Please check your subjects list.",
lvl="warning",
)
subjs_list.remove(subj)
del subjs_list_copy

else:
cprint("Using all the subjects contained into the ADNIMERGE.csv file...")
subjs_list = list(adni_merge["PTID"].unique())
subjs_list = get_subjects_list(
Path(source_dir), Path(clinical_dir), subjs_list_path
)

# Create the output folder if is not already existing
os.makedirs(dest_dir, exist_ok=True)
Expand Down
62 changes: 60 additions & 2 deletions clinica/iotools/converters/adni_to_bids/adni_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,62 @@ def from_string(cls, study_name: str):
)


def _define_subjects_list(
source_dir: Path,
subjs_list_path: Optional[Path] = None,
) -> List[str]:
# todo : here or in utils for all converters ?
import re

from clinica.utils.stream import cprint

if subjs_list_path:
cprint("Loading a subjects lists provided by the user...")
return subjs_list_path.read_text().splitlines()

cprint(f"Using the subjects contained in the ADNI dataset at {source_dir}")
rgx = re.compile(r"\d{3}_S_\d{4}")
return list(filter(rgx.fullmatch, [folder.name for folder in source_dir.iterdir()]))


def _check_subjects_list(
subjs_list: List[str],
clinical_dir: Path,
) -> List[str]:
from copy import copy

from clinica.utils.stream import cprint

subjs_list_copy = copy(subjs_list)
adni_merge = load_clinical_csv(str(clinical_dir), "ADNIMERGE")
# Check that there are no errors in subjs_list given by the user
for subj in subjs_list_copy:
adnimerge_subj = adni_merge[adni_merge.PTID == subj]
if len(adnimerge_subj) == 0:
cprint(
msg=f"Subject with PTID {subj} does not have corresponding clinical data."
f"Please check your subjects list or directory.",
lvl="warning",
)
subjs_list.remove(subj)
del subjs_list_copy

if not subjs_list:
cprint(f"Processing an empty list of subjects.", lvl="warning")

return subjs_list


def get_subjects_list(
source_dir: Path,
clinical_dir: Path,
subjs_list_path: Optional[Path] = None,
) -> List[str]:
return _check_subjects_list(
_define_subjects_list(source_dir, subjs_list_path), clinical_dir
)


def visits_to_timepoints(
subject,
mri_list_subj,
Expand Down Expand Up @@ -981,8 +1037,10 @@ def create_adni_sessions_dict(
]
df_subj_session = pd.concat([df_subj_session, df_filtered], axis=1)
if df_subj_session.empty:
raise ValueError("Empty dataset detected. Clinical data cannot be extracted.")

cprint(
"Empty dataset detected. Clinical data cannot be extracted.", lvl="warning"
)
return
# Nv/None refer to sessions whose session is undefined. "sc" is the screening session with unreliable (incomplete)
# data.
df_subj_session = df_subj_session[
Expand Down
56 changes: 56 additions & 0 deletions test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,62 @@
from pandas.testing import assert_frame_equal, assert_series_equal


@pytest.mark.parametrize(
"input, expected",
[
(
{"001_S_0001", "001_S_0002", "001_S_0003"},
{"001_S_0001", "001_S_0002", "001_S_0003"},
),
({"001_S_0001", "001_S_00014", ".001_S_0001", "001S0001"}, {"001_S_0001"}),
],
)
def test_define_subjects_list_directory(tmp_path, input, expected):
from clinica.iotools.converters.adni_to_bids.adni_utils import _define_subjects_list

source_dir = tmp_path / "source_dir"
source_dir.mkdir()

for subject in input:
(source_dir / subject).touch()

assert set(_define_subjects_list(source_dir)) == expected


def test_define_subjects_list_txt(tmp_path):
from clinica.iotools.converters.adni_to_bids.adni_utils import _define_subjects_list

source_dir = tmp_path / "source_dir"
subjs_list_path = tmp_path / "subjects_list.txt"
input = {"001_S_0001", "001_S_00022", "001S0003"}
with open(subjs_list_path, "w") as f:
f.write("\n".join(input))

assert set(_define_subjects_list(source_dir, subjs_list_path)) == input


@pytest.mark.parametrize(
"write_all, input, expected",
[
(True, ["001_S_0001", "001_S_0002"], {"001_S_0001", "001_S_0002"}),
(False, ["001_S_0001", "001_S_0002"], {"001_S_0001"}),
],
)
def test_check_subjects_list(tmp_path, write_all, input, expected):
from clinica.iotools.converters.adni_to_bids.adni_utils import _check_subjects_list

clinical_dir = tmp_path / "clinical_dir"
clinical_dir.mkdir()

if not write_all:
input.pop()

adni_df = pd.DataFrame(columns=["PTID"], data=input)
adni_df.to_csv(clinical_dir / "ADNIMERGE.csv")

assert set(_check_subjects_list(input, clinical_dir)) == expected


@pytest.mark.parametrize(
"input_value,expected",
[
Expand Down

0 comments on commit b142b96

Please sign in to comment.