Skip to content

Commit 8613505

Browse files
authored
Merge pull request #17 from handwerkerd/gen-req-metrics-dh
Getting generate metrics fully running
2 parents 4b751eb + 3fe2443 commit 8613505

9 files changed

+83
-51
lines changed

docs/building_decision_trees.rst

+4-6
Original file line numberDiff line numberDiff line change
@@ -210,11 +210,9 @@ that is used to check whether results are plausible & can help avoid mistakes.
210210

211211
- necessary_metrics
212212
A list of the necessary metrics in the component table that will be used
213-
by the tree. If a metric doesn't exist then this will raise an error instead
214-
of executing a tree. (Depending on future code development, this could
215-
potentially be used to run ``tedana`` by specifying a decision tree and
216-
metrics are calculated based on the contents of this field.) If a necessary
217-
metric isn't used, there will be a warning.
213+
by the tree. This field defines what metrics will be calculated on each ICA
214+
comopnent. If a metric doesn't exist then this will raise an error instead
215+
of executing a tree. If a necessary metric isn't used, there will be a warning.
218216

219217
- generated_metrics
220218
An optional initial field. It lists metrics that are to be calculated as
@@ -378,7 +376,7 @@ dataframe column that is True or False for the components in ``decide_comps`` ba
378376
the function's criteria.
379377
That column is an input to :func:`~tedana.selection.selection_utils.change_comptable_classifications`,
380378
which will update the component_table classifications, update the classification history
381-
in component_status_table, and update the component classification_tags. Components not
379+
in ``selector.component_status_table_``, and update the component classification_tags. Components not
382380
in ``decide_comps`` retain their existing classifications and tags.
383381
:func:`~tedana.selection.selection_utils.change_comptable_classifications`
384382
also returns and should assign values to

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ dependencies = [
2727
"nibabel>=2.5.1,<=5.2.0",
2828
"nilearn>=0.7,<=0.10.3",
2929
"numpy>=1.16,<=1.26.4",
30-
"pandas>=2.0,<=2.2.0",
30+
"pandas>=2.0,<=2.2.1",
3131
"pybtex",
3232
"pybtex-apa-style",
3333
"scikit-learn>=0.21, <=1.4.1.post1",

tedana/io.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,7 @@ def writeresults(ts, mask, comptable, mmix, io_generator):
643643
========================================= ===========================================
644644
Filename Content
645645
========================================= ===========================================
646-
desc-denoised_bold.nii.gz Denoised time series.
646+
desc-denoised_bold.nii.gz Denoised time series.
647647
648648
desc-optcomAccepted_bold.nii.gz High-Kappa time series. (only with verbose)
649649
desc-optcomRejected_bold.nii.gz Low-Kappa time series. (only with verbose)

tedana/selection/component_selector.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,8 @@ def select(self, component_table, cross_component_metrics={}, status_table=None)
272272
-----
273273
Adds to the ``ComponentSelector``:
274274
275-
- component_status_table_: empty dataframe or contents of inputted status_table
276-
- cross_component_metrics_: empty dict or contents of inputed values
275+
- ``component_status_table_``: empty dataframe or contents of inputted status_table
276+
- ``cross_component_metrics_``: empty dict or contents of inputed values
277277
- used_metrics: empty set
278278
279279
Any parameter that is used by a decision tree node function can be passed
@@ -307,19 +307,18 @@ def select(self, component_table, cross_component_metrics={}, status_table=None)
307307
308308
When this is run, multiple elements in `ComponentSelector` will change including:
309309
310-
- component_table_: ``classification`` column with ``accepted`` or ``rejected`` labels
310+
- ``component_table_``: ``classification`` column with ``accepted`` or ``rejected`` labels
311311
and ``classification_tags`` column with can hold multiple comma-separated labels
312312
explaining why a classification happened
313-
- cross_component_metrics_: Any values that were calculated based on the metric
313+
- ``cross_component_metrics_``: Any values that were calculated based on the metric
314314
values across components or by direct user input
315-
- component_status_table: Contains the classification statuses at each node in
315+
- ``component_status_table_``: Contains the classification statuses at each node in
316316
the decision tree
317317
- used_metrics: A list of metrics used in the selection process
318318
- nodes: The original tree definition with an added ``outputs`` key listing
319319
everything that changed in each node
320-
- current_node_idx_: The total number of nodes run in ``ComponentSelector``
320+
- ``current_node_idx_``: The total number of nodes run in ``ComponentSelector``
321321
"""
322-
self.__dict__.update(cross_component_metrics)
323322
self.cross_component_metrics_ = cross_component_metrics
324323

325324
# Construct an un-executed selector
@@ -349,8 +348,13 @@ def select(self, component_table, cross_component_metrics={}, status_table=None)
349348
self.start_idx_ = 0
350349
else:
351350
# Since a status table exists, we need to skip nodes up to the
352-
# point where the last tree finished
353-
self.start_idx_ = len(self.tree["nodes"])
351+
# point where the last tree finished. Notes that were executed
352+
# have an output field. Identify the last node with an output field
353+
tmp_idx = len(self.tree["nodes"]) - 1
354+
while ("outputs" not in self.tree["nodes"][tmp_idx]) and (tmp_idx > 0):
355+
tmp_idx -= 1
356+
# start at the first node that does not have an output field
357+
self.start_idx_ = tmp_idx + 1
354358
LGR.info(f"Start is {self.start_idx_}")
355359
self.component_status_table_ = status_table
356360

@@ -440,7 +444,7 @@ def check_null(self, params, fcn):
440444
for key, val in params.items():
441445
if val is None:
442446
try:
443-
params[key] = getattr(self, key)
447+
params[key] = self.cross_component_metrics_[key]
444448
except AttributeError:
445449
raise ValueError(
446450
f"Parameter {key} is required in node {fcn}, but not defined. "

tedana/selection/selection_nodes.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@ def calc_kappa_elbow(
717717
outputs = {
718718
"decision_node_idx": selector.current_node_idx_,
719719
"node_label": None,
720-
"n_echos": selector.n_echos,
720+
"n_echos": selector.cross_component_metrics_["n_echos"],
721721
"used_metrics": {"kappa"},
722722
"calc_cross_comp_metrics": [
723723
"kappa_elbow_kundu",
@@ -775,7 +775,11 @@ def calc_kappa_elbow(
775775
outputs["kappa_allcomps_elbow"],
776776
outputs["kappa_nonsig_elbow"],
777777
outputs["varex_upper_p"],
778-
) = kappa_elbow_kundu(selector.component_table_, selector.n_echos, comps2use=comps2use)
778+
) = kappa_elbow_kundu(
779+
selector.component_table_,
780+
selector.cross_component_metrics_["n_echos"],
781+
comps2use=comps2use,
782+
)
779783
selector.cross_component_metrics_["kappa_elbow_kundu"] = outputs["kappa_elbow_kundu"]
780784
selector.cross_component_metrics_["kappa_allcomps_elbow"] = outputs["kappa_allcomps_elbow"]
781785
selector.cross_component_metrics_["kappa_nonsig_elbow"] = outputs["kappa_nonsig_elbow"]
@@ -845,7 +849,7 @@ def calc_rho_elbow(
845849
outputs = {
846850
"decision_node_idx": selector.current_node_idx_,
847851
"node_label": None,
848-
"n_echos": selector.n_echos,
852+
"n_echos": selector.cross_component_metrics_["n_echos"],
849853
"calc_cross_comp_metrics": [
850854
elbow_name,
851855
"rho_allcomps_elbow",
@@ -900,7 +904,7 @@ def calc_rho_elbow(
900904
outputs["elbow_f05"],
901905
) = rho_elbow_kundu_liberal(
902906
selector.component_table_,
903-
selector.n_echos,
907+
selector.cross_component_metrics_["n_echos"],
904908
rho_elbow_type=rho_elbow_type,
905909
comps2use=comps2use,
906910
subset_comps2use=subset_comps2use,
@@ -1214,7 +1218,7 @@ def calc_varex_thresh(
12141218
num_highest_var_comps : :obj:`str` :obj:`int`
12151219
percentile can be calculated on the num_highest_var_comps components with the
12161220
lowest variance. Either input an integer directly or input a string that is
1217-
a parameter stored in selector.cross_component_metrics_ ("num_acc_guess" in
1221+
a parameter stored in ``selector.cross_component_metrics_`` ("num_acc_guess" in
12181222
original decision tree). Default=None
12191223
%(log_extra_info)s
12201224
%(custom_node_label)s

tedana/selection/selection_utils.py

+23-23
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414
##############################################################
1515

1616

17-
def selectcomps2use(comptable, decide_comps):
17+
def selectcomps2use(component_table, decide_comps):
1818
"""Get a list of component numbers that fit the classification types in ``decide_comps``.
1919
2020
Parameters
2121
----------
22-
comptable : :obj:`~pandas.DataFrame`
23-
Only uses the component_table in this object
22+
component_table : :obj:`~pandas.DataFrame`
23+
The component_table with metrics and labels for each ICA component
2424
decide_comps : :obj:`str` or :obj:`list[str]` or :obj:`list[int]`
2525
This is string or a list of strings describing what classifications
2626
of components to operate on, using default or intermediate_classification
@@ -34,31 +34,31 @@ def selectcomps2use(comptable, decide_comps):
3434
comps2use : :obj:`list[int]`
3535
A list of component indices with classifications included in decide_comps
3636
"""
37-
if "classification" not in comptable:
38-
raise ValueError("comptable needs a 'classification' column to run selectcomps2use")
37+
if "classification" not in component_table:
38+
raise ValueError("component_table needs a 'classification' column to run selectcomps2use")
3939

4040
if isinstance(decide_comps, (str, int)):
4141
decide_comps = [decide_comps]
4242

4343
if isinstance(decide_comps, list) and (decide_comps[0] == "all"):
4444
# All components with any string in the classification field are set to True
45-
comps2use = list(range(comptable.shape[0]))
45+
comps2use = list(range(component_table.shape[0]))
4646

4747
elif isinstance(decide_comps, list) and all(isinstance(elem, str) for elem in decide_comps):
4848
comps2use = []
4949
for didx in range(len(decide_comps)):
50-
newcomps2use = comptable.index[
51-
comptable["classification"] == decide_comps[didx]
50+
newcomps2use = component_table.index[
51+
component_table["classification"] == decide_comps[didx]
5252
].tolist()
5353
comps2use = list(set(comps2use + newcomps2use))
5454

5555
elif isinstance(decide_comps, list) and all(isinstance(elem, int) for elem in decide_comps):
5656
# decide_comps is already a list of indices
57-
if len(comptable) <= max(decide_comps):
57+
if len(component_table) <= max(decide_comps):
5858
raise ValueError(
5959
"decide_comps for selectcomps2use is selecting for a component with index"
6060
f"{max(decide_comps)} (0 indexing) which is greater than the number "
61-
f"of components: {len(comptable)}"
61+
f"of components: {len(component_table)}"
6262
)
6363
elif min(decide_comps) < 0:
6464
raise ValueError(
@@ -100,8 +100,8 @@ def change_comptable_classifications(
100100
Parameters
101101
----------
102102
selector : :obj:`tedana.selection.component_selector.ComponentSelector`
103-
The attributes used are component_table, component_status_table, and
104-
current_node_idx_
103+
The attributes used are ``component_table_``, ``component_status_table_``, and
104+
``current_node_idx_``
105105
if_true, if_false : :obj:`str`
106106
If the condition in this step is true or false, give the component
107107
the label in this string. Options are 'accepted', 'rejected',
@@ -123,12 +123,12 @@ def change_comptable_classifications(
123123
Returns
124124
-------
125125
selector : :obj:`tedana.selection.component_selector.ComponentSelector`
126-
component_table["classifications"] will reflect any new
126+
``component_table_["classifications"]`` will reflect any new
127127
classifications.
128-
component_status_table will have a new column titled
129-
"Node current_node_idx_" that is a copy of the updated classifications
128+
``component_status_table_`` will have a new column titled
129+
"Node ``current_node_idx_``" that is a copy of the updated classifications
130130
column.
131-
component_table["classification_tags"] will be updated to include any
131+
``component_table_["classification_tags"]`` will be updated to include any
132132
new tags. Each tag should appear only once in the string and tags will
133133
be separated by commas.
134134
n_true, n_false : :obj:`int`
@@ -178,8 +178,8 @@ def comptable_classification_changer(
178178
Parameters
179179
----------
180180
selector : :obj:`tedana.selection.component_selector.ComponentSelector`
181-
The attributes used are component_table, component_status_table, and
182-
current_node_idx_
181+
The attributes used are ``component_table_``, ``component_status_table_``, and
182+
``current_node_idx_``
183183
boolstate : :obj:`bool`
184184
Change classifications only for True or False components in
185185
decision_boolean based on this variable
@@ -207,12 +207,12 @@ def comptable_classification_changer(
207207
-------
208208
selector : :obj:`tedana.selection.component_selector.ComponentSelector`
209209
Operates on the True OR False components depending on boolstate
210-
component_table["classifications"] will reflect any new
210+
``component_table_["classifications"]`` will reflect any new
211211
classifications.
212-
component_status_table will have a new column titled
213-
"Node current_node_idx_" that is a copy of the updated classifications
212+
``component_status_table_`` will have a new column titled
213+
"Node ``current_node_idx_``" that is a copy of the updated classifications
214214
column.
215-
component_table["classification_tags"] will be updated to include any
215+
component_table_["classification_tags"] will be updated to include any
216216
new tags. Each tag should appear only once in the string and tags will
217217
be separated by commas.
218218
@@ -363,7 +363,7 @@ def log_decision_tree_step(
363363
----------
364364
function_name_idx : :obj:`str`
365365
The name of the function that should be logged. By convention, this
366-
be "Step current_node_idx_: function_name"
366+
be "Step ``current_node_idx_``: function_name"
367367
comps2use : :obj:`list[int]` or -1
368368
A list of component indices that should be used by a function.
369369
Only used to report no components found if empty and report

tedana/tests/test_component_selector.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def dicts_to_test(treechoice):
3838
"missing_req_param": A missing required param in a decision node function
3939
"missing_function": An undefined decision node function
4040
"missing_key": A dict missing one of the required keys (report)
41+
"null_value": A parameter in one node improperly has a null value
4142
4243
Returns
4344
-------
@@ -267,6 +268,13 @@ def test_check_null_succeeds():
267268
selector = component_selector.ComponentSelector(tree="minimal")
268269
selector.tree = dicts_to_test("null_value")
269270

271+
# "left" is missing from the function definition in node
272+
# but is found as an initialized cross component metric
273+
# so this should execute successfully
274+
selector.cross_component_metrics_ = {
275+
"left": 3,
276+
}
277+
270278
params = selector.tree["nodes"][0]["parameters"]
271279
functionname = selector.tree["nodes"][0]["functionname"]
272280
selector.check_null(params, functionname)
@@ -295,8 +303,11 @@ def test_are_all_components_accepted_or_rejected():
295303
def test_selector_properties_smoke():
296304
"""Tests to confirm properties match expected results."""
297305

306+
# Runs on un-executed component table to smoke test three class
307+
# functions that are used to count various types of component
308+
# classifications in the component table
298309
selector = component_selector.ComponentSelector(tree="minimal")
299-
selector.select(component_table=sample_comptable(), cross_component_metrics={"n_echos": 3})
310+
selector.component_table_ = sample_comptable()
300311

301312
assert selector.n_comps_ == 21
302313

tedana/tests/test_selection_utils.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,17 @@ def sample_selector(options=None):
5555
"test_elbow": 21,
5656
}
5757
selector = ComponentSelector(tree=tree)
58-
selector.select(component_table=component_table, cross_component_metrics=xcomp)
58+
59+
# Add an un-executed component table,cross component metrics, and status table
60+
selector.component_table_ = component_table.copy()
61+
selector.cross_component_metrics_ = xcomp
62+
selector.component_status_table_ = selector.component_table_[
63+
["Component", "classification"]
64+
].copy()
65+
selector.component_status_table_ = selector.component_status_table_.rename(
66+
columns={"classification": "initialized classification"}
67+
)
68+
5969
selector.current_node_idx_ = 0
6070

6171
return selector

tedana/workflows/tedana.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,9 @@ def tedana_workflow(
499499
if isinstance(data, str):
500500
data = [data]
501501

502+
LGR.info("Initializing and validating component selection tree")
503+
selector = ComponentSelector(tree)
504+
502505
LGR.info(f"Loading input data: {[f for f in data]}")
503506
catd, ref_img = io.load_data(data, n_echos=n_echos)
504507

@@ -630,8 +633,8 @@ def tedana_workflow(
630633
# optimally combine data
631634
data_oc = combine.make_optcom(catd, tes, masksum_denoise, t2s=t2s_full, combmode=combmode)
632635

633-
# regress out global signal unless explicitly not desired
634636
if "gsr" in gscontrol:
637+
# regress out global signal
635638
catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, io_generator)
636639

637640
fout = io_generator.save_file(data_oc, "combined img")
@@ -669,8 +672,6 @@ def tedana_workflow(
669672
# Estimate betas and compute selection metrics for mixing matrix
670673
# generated from dimensionally reduced data using full data (i.e., data
671674
# with thermal noise)
672-
LGR.info("Making second component selection guess from ICA results")
673-
selector = ComponentSelector(tree)
674675
necessary_metrics = selector.necessary_metrics
675676
# The figures require some metrics that might not be used by the decision tree.
676677
extra_metrics = ["variance explained", "normalized variance explained", "kappa", "rho"]
@@ -686,6 +687,7 @@ def tedana_workflow(
686687
"ICA",
687688
metrics=necessary_metrics,
688689
)
690+
LGR.info("Selecting components from ICA results")
689691
selector = selection.automatic_selection(
690692
comptable,
691693
selector,
@@ -704,6 +706,9 @@ def tedana_workflow(
704706
# If we're going to restart, temporarily allow force overwrite
705707
if keep_restarting:
706708
io_generator.overwrite = True
709+
# Create a re-initialized selector object if rerunning
710+
selector = ComponentSelector(tree)
711+
707712
RepLGR.disabled = True # Disable the report to avoid duplicate text
708713
RepLGR.disabled = False # Re-enable the report after the while loop is escaped
709714
io_generator.overwrite = overwrite # Re-enable original overwrite behavior

0 commit comments

Comments
 (0)