Skip to content

Commit 01f8ea0

Browse files
authored
MRG: #633 from vocalpy/rename-csv-path-dataset-path
ENH: Rename config option `csv_path` -> `dataset_path`, fix #549
2 parents a974473 + 08f70b8 commit 01f8ea0

29 files changed

+181
-181
lines changed

src/vak/cli/eval.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,17 @@ def eval(toml_path):
4646
model_name = cfg.eval.model
4747
model_config = config.model.config_from_toml_path(toml_path, model_name)
4848

49-
if cfg.eval.csv_path is None:
49+
if cfg.eval.dataset_path is None:
5050
raise ValueError(
51-
"No value is specified for 'csv_path' in this .toml config file."
51+
"No value is specified for 'dataset_path' in this .toml config file."
5252
f"To generate a .csv file that represents the dataset, "
5353
f"please run the following command:\n'vak prep {toml_path}'"
5454
)
5555

5656
core.eval(
5757
model_name=model_name,
5858
model_config=model_config,
59-
csv_path=cfg.eval.csv_path,
59+
dataset_path=cfg.eval.dataset_path,
6060
checkpoint_path=cfg.eval.checkpoint_path,
6161
labelmap_path=cfg.eval.labelmap_path,
6262
output_dir=cfg.eval.output_dir,

src/vak/cli/learncurve.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ def learning_curve(toml_path):
5353
model_name = cfg.learncurve.model
5454
model_config = config.model.config_from_toml_path(toml_path, model_name)
5555

56-
if cfg.learncurve.csv_path is None:
56+
if cfg.learncurve.dataset_path is None:
5757
raise ValueError(
58-
"No value is specified for 'csv_path' in this .toml config file."
58+
"No value is specified for 'dataset_path' in this .toml config file."
5959
f"To generate a .csv file that represents the dataset, "
6060
f"please run the following command:\n'vak prep {toml_path}'"
6161
)
@@ -65,7 +65,7 @@ def learning_curve(toml_path):
6565
model_config=model_config,
6666
train_set_durs=cfg.learncurve.train_set_durs,
6767
num_replicates=cfg.learncurve.num_replicates,
68-
csv_path=cfg.learncurve.csv_path,
68+
dataset_path=cfg.learncurve.dataset_path,
6969
labelset=cfg.prep.labelset,
7070
window_size=cfg.dataloader.window_size,
7171
batch_size=cfg.learncurve.batch_size,

src/vak/cli/predict.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,17 @@ def predict(toml_path):
4141
model_name = cfg.predict.model
4242
model_config = config.model.config_from_toml_path(toml_path, model_name)
4343

44-
if cfg.predict.csv_path is None:
44+
if cfg.predict.dataset_path is None:
4545
raise ValueError(
46-
"No value is specified for 'csv_path' in this .toml config file."
46+
"No value is specified for 'dataset_path' in this .toml config file."
4747
f"To generate a .csv file that represents the dataset, "
4848
f"please run the following command:\n'vak prep {toml_path}'"
4949
)
5050

5151
core.predict(
5252
model_name=model_name,
5353
model_config=model_config,
54-
csv_path=cfg.predict.csv_path,
54+
dataset_path=cfg.predict.dataset_path,
5555
checkpoint_path=cfg.predict.checkpoint_path,
5656
labelmap_path=cfg.predict.labelmap_path,
5757
window_size=cfg.dataloader.window_size,

src/vak/cli/prep.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -81,18 +81,18 @@ def prep(toml_path):
8181
"""
8282
toml_path = Path(toml_path)
8383

84-
# open here because need to check for `csv_path` in this function, see #314 & #333
84+
# open here because need to check for `dataset_path` in this function, see #314 & #333
8585
config_toml = _load_toml_from_path(toml_path)
8686
# ---- figure out purpose of config file from sections; will save csv path in that section -------------------------
8787
purpose = purpose_from_toml(config_toml, toml_path)
8888
if (
89-
"csv_path" in config_toml[purpose.upper()]
90-
and config_toml[purpose.upper()]["csv_path"] is not None
89+
"dataset_path" in config_toml[purpose.upper()]
90+
and config_toml[purpose.upper()]["dataset_path"] is not None
9191
):
9292
raise ValueError(
93-
f"config .toml file already has a 'csv_path' option in the '{purpose.upper()}' section, "
93+
f"config .toml file already has a 'dataset_path' option in the '{purpose.upper()}' section, "
9494
f"and running `prep` would overwrite that value. To `prep` a new dataset, please remove "
95-
f"the 'csv_path' option from the '{purpose.upper()}' section in the config file:\n{toml_path}"
95+
f"the 'dataset_path' option from the '{purpose.upper()}' section in the config file:\n{toml_path}"
9696
)
9797

9898
# now that we've checked that, go ahead and parse the sections we want
@@ -127,10 +127,10 @@ def prep(toml_path):
127127
section = purpose.upper()
128128
logger.info(
129129
f"Determined that purpose of config file is: {purpose}.\n"
130-
f"Will add 'csv_path' option to '{section}' section."
130+
f"Will add 'dataset_path' option to '{section}' section."
131131
)
132132

133-
vak_df, csv_path = core.prep(
133+
vak_df, dataset_path = core.prep(
134134
data_dir=cfg.prep.data_dir,
135135
purpose=purpose,
136136
audio_format=cfg.prep.audio_format,
@@ -147,8 +147,8 @@ def prep(toml_path):
147147
test_dur=cfg.prep.test_dur,
148148
)
149149

150-
# use config and section from above to add csv_path to config.toml file
151-
config_toml[section]["csv_path"] = str(csv_path)
150+
# use config and section from above to add dataset_path to config.toml file
151+
config_toml[section]["dataset_path"] = str(dataset_path)
152152

153153
with toml_path.open("w") as fp:
154154
toml.dump(config_toml, fp)

src/vak/cli/train.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ def train(toml_path):
5252
model_name = cfg.train.model
5353
model_config = config.model.config_from_toml_path(toml_path, model_name)
5454

55-
if cfg.train.csv_path is None:
55+
if cfg.train.dataset_path is None:
5656
raise ValueError(
57-
"No value is specified for 'csv_path' in this .toml config file."
57+
"No value is specified for 'dataset_path' in this .toml config file."
5858
f"To generate a .csv file that represents the dataset, "
5959
f"please run the following command:\n'vak prep {toml_path}'"
6060
)
@@ -67,7 +67,7 @@ def train(toml_path):
6767
core.train(
6868
model_name=model_name,
6969
model_config=model_config,
70-
csv_path=cfg.train.csv_path,
70+
dataset_path=cfg.train.dataset_path,
7171
labelset=labelset,
7272
window_size=cfg.dataloader.window_size,
7373
batch_size=cfg.train.batch_size,

src/vak/config/eval.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ class EvalConfig:
6464
6565
Attributes
6666
----------
67-
csv_path : str
68-
path to where dataset was saved as a csv.
67+
dataset_path : str
68+
Path to dataset, e.g., a csv file generated by running ``vak prep``.
6969
checkpoint_path : str
7070
path to directory with checkpoint files saved by Torch, to reload model
7171
output_dir : str
@@ -111,9 +111,9 @@ class EvalConfig:
111111
)
112112
batch_size = attr.ib(converter=int, validator=instance_of(int))
113113

114-
# csv_path is actually 'required' but we can't enforce that here because cli.prep looks at
115-
# what sections are defined to figure out where to add csv_path after it creates the csv
116-
csv_path = attr.ib(
114+
# dataset_path is actually 'required' but we can't enforce that here because cli.prep looks at
115+
# what sections are defined to figure out where to add dataset_path after it creates the csv
116+
dataset_path = attr.ib(
117117
converter=converters.optional(expanded_user_path),
118118
default=None,
119119
)

src/vak/config/learncurve.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ class LearncurveConfig(TrainConfig):
1616
----------
1717
model : str
1818
Model name, e.g., ``model = "TweetyNet"``
19-
csv_path : str
20-
path to where dataset was saved as a csv.
19+
dataset_path : str
20+
Path to dataset, e.g., a csv file generated by running ``vak prep``.
2121
num_epochs : int
2222
number of training epochs. One epoch = one iteration through the entire
2323
training set.

src/vak/config/predict.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ class PredictConfig:
1717
1818
Attributes
1919
----------
20-
csv_path : str
21-
path to where dataset was saved as a csv.
20+
dataset_path : str
21+
Path to dataset, e.g., a csv file generated by running ``vak prep``.
2222
checkpoint_path : str
2323
path to directory with checkpoint files saved by Torch, to reload model
2424
labelmap_path : str
@@ -62,7 +62,7 @@ class PredictConfig:
6262
before they are converted to annotations. Default is False.
6363
Typically the output will be "logits"
6464
to which a softmax transform might be applied.
65-
For each item in the dataset--each row in the `csv_path` .csv--
65+
For each item in the dataset--each row in the `dataset_path` .csv--
6666
the output will be saved in a separate file in `output_dir`,
6767
with the extension `{MODEL_NAME}.output.npz`. E.g., if the input is a
6868
spectrogram with `spect_path` filename `gy6or6_032312_081416.npz`,
@@ -80,9 +80,9 @@ class PredictConfig:
8080
)
8181
batch_size = attr.ib(converter=int, validator=instance_of(int))
8282

83-
# csv_path is actually 'required' but we can't enforce that here because cli.prep looks at
84-
# what sections are defined to figure out where to add csv_path after it creates the csv
85-
csv_path = attr.ib(
83+
# dataset_path is actually 'required' but we can't enforce that here because cli.prep looks at
84+
# what sections are defined to figure out where to add dataset_path after it creates the csv
85+
dataset_path = attr.ib(
8686
converter=converters.optional(expanded_user_path),
8787
default=None,
8888
)

src/vak/config/train.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ class TrainConfig:
1616
----------
1717
model : str
1818
Model name, e.g., ``model = "TweetyNet"``
19-
csv_path : str
20-
path to where dataset was saved as a csv.
19+
dataset_path : str
20+
Path to dataset, e.g., a csv file generated by running ``vak prep``.
2121
num_epochs : int
2222
number of training epochs. One epoch = one iteration through the entire
2323
training set.
@@ -73,9 +73,9 @@ class TrainConfig:
7373
root_results_dir = attr.ib(converter=expanded_user_path)
7474

7575
# optional
76-
# csv_path is actually 'required' but we can't enforce that here because cli.prep looks at
77-
# what sections are defined to figure out where to add csv_path after it creates the csv
78-
csv_path = attr.ib(
76+
# dataset_path is actually 'required' but we can't enforce that here because cli.prep looks at
77+
# what sections are defined to figure out where to add dataset_path after it creates the csv
78+
dataset_path = attr.ib(
7979
converter=converters.optional(expanded_user_path),
8080
default=None,
8181
)

src/vak/config/valid.toml

+4-4
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ window_size = 88
3636
[TRAIN]
3737
model = 'TweetyNet'
3838
root_results_dir = './tests/test_data/results/train'
39-
csv_path = 'tests/test_data/prep/train/032312_prep_191224_225912.csv'
39+
dataset_path = 'tests/test_data/prep/train/032312_prep_191224_225912.csv'
4040
num_workers = 4
4141
device = 'cuda'
4242
batch_size = 11
@@ -53,7 +53,7 @@ spect_scaler_path = '/home/user/results_181014_194418/spect_scaler'
5353

5454

5555
[EVAL]
56-
csv_path = 'tests/test_data/prep/learncurve/032312_prep_191224_225910.csv'
56+
dataset_path = 'tests/test_data/prep/learncurve/032312_prep_191224_225910.csv'
5757
checkpoint_path = '/home/user/results_181014_194418/TweetyNet/checkpoints/'
5858
labelmap_path = '/home/user/results_181014_194418/labelmap.json'
5959
output_dir = './tests/test_data/prep/learncurve'
@@ -76,7 +76,7 @@ ckpt_step = 1
7676
patience = 4
7777
train_set_durs = [ 4, 6 ]
7878
num_replicates = 2
79-
csv_path = 'tests/test_data/prep/learncurve/032312_prep_191224_225910.csv'
79+
dataset_path = 'tests/test_data/prep/learncurve/032312_prep_191224_225910.csv'
8080
results_dir_made_by_main_script = '/some/path/to/learncurve/'
8181
previous_run_path = '/some/path/to/learncurve/results_20210106_132152'
8282
post_tfm_kwargs = {'majority_vote' = true, 'min_segment_dur' = 0.01}
@@ -85,7 +85,7 @@ device = 'cuda'
8585

8686

8787
[PREDICT]
88-
csv_path = 'tests/test_data/prep/learncurve/032312_prep_191224_225910.csv'
88+
dataset_path = 'tests/test_data/prep/learncurve/032312_prep_191224_225910.csv'
8989
checkpoint_path = '/home/user/results_181014_194418/TweetyNet/checkpoints/'
9090
labelmap_path = '/home/user/results_181014_194418/labelmap.json'
9191
annot_csv_filename = '032312_prep_191224_225910.annot.csv'

src/vak/core/eval.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
def eval(
2626
model_name: str,
2727
model_config: dict,
28-
csv_path,
28+
dataset_path,
2929
checkpoint_path,
3030
labelmap_path,
3131
output_dir,
@@ -48,8 +48,8 @@ def eval(
4848
Model configuration in a ``dict``,
4949
as loaded from a .toml file,
5050
and used by the model method ``from_config``.
51-
csv_path : str, pathlib.Path
52-
path to where dataset was saved as a csv.
51+
dataset_path : str, pathlib.Path
52+
Path to dataset, e.g., a csv file generated by running ``vak prep``.
5353
checkpoint_path : str, pathlib.Path
5454
path to directory with checkpoint files saved by Torch, to reload model
5555
output_dir : str, pathlib.Path
@@ -105,8 +105,8 @@ def eval(
105105
"""
106106
# ---- pre-conditions ----------------------------------------------------------------------------------------------
107107
for path, path_name in zip(
108-
(checkpoint_path, csv_path, labelmap_path, spect_scaler_path),
109-
('checkpoint_path', 'csv_path', 'labelmap_path', 'spect_scaler_path'),
108+
(checkpoint_path, dataset_path, labelmap_path, spect_scaler_path),
109+
('checkpoint_path', 'dataset_path', 'labelmap_path', 'spect_scaler_path'),
110110
):
111111
if path is not None: # because `spect_scaler_path` is optional
112112
if not validators.is_a_file(path):
@@ -148,9 +148,9 @@ def eval(
148148
window_size=window_size,
149149
return_padding_mask=True,
150150
)
151-
logger.info(f"creating dataset for evaluation from: {csv_path}")
151+
logger.info(f"creating dataset for evaluation from: {dataset_path}")
152152
val_dataset = VocalDataset.from_csv(
153-
csv_path=csv_path,
153+
csv_path=dataset_path,
154154
split=split,
155155
labelmap=labelmap,
156156
spect_key=spect_key,
@@ -173,7 +173,7 @@ def eval(
173173
input_shape = input_shape[1:]
174174

175175
if post_tfm_kwargs:
176-
dataset_df = pd.read_csv(csv_path)
176+
dataset_df = pd.read_csv(dataset_path)
177177
# we use the timebins vector from the first spect path to get timebin dur.
178178
# this is less careful than calling io.dataframe.validate_and_get_timebin_dur
179179
# but it's also much faster, and we can assume dataframe was validated when it was made
@@ -227,7 +227,7 @@ def eval(
227227
("checkpoint_path", checkpoint_path),
228228
("labelmap_path", labelmap_path),
229229
("spect_scaler_path", spect_scaler_path),
230-
("csv_path", csv_path),
230+
("dataset_path", dataset_path),
231231
]
232232
)
233233
# TODO: is this still necessary after switching to Lightning? Stop saying "average"?

0 commit comments

Comments
 (0)