vocalpy
diff --git a/‎src/vak/cli/eval.py
+3-3 b/‎src/vak/cli/eval.py
+3-3
diff --git a/‎src/vak/cli/learncurve.py
+3-3 b/‎src/vak/cli/learncurve.py
+3-3
diff --git a/‎src/vak/cli/predict.py
+3-3 b/‎src/vak/cli/predict.py
+3-3
diff --git a/‎src/vak/cli/prep.py
+9-9 b/‎src/vak/cli/prep.py
+9-9
diff --git a/‎src/vak/cli/train.py
+3-3 b/‎src/vak/cli/train.py
+3-3
diff --git a/‎src/vak/config/eval.py
+5-5 b/‎src/vak/config/eval.py
+5-5
diff --git a/‎src/vak/config/learncurve.py
+2-2 b/‎src/vak/config/learncurve.py
+2-2
diff --git a/‎src/vak/config/predict.py
+6-6 b/‎src/vak/config/predict.py
+6-6
diff --git a/‎src/vak/config/train.py
+5-5 b/‎src/vak/config/train.py
+5-5
diff --git a/‎src/vak/config/valid.toml
+4-4 b/‎src/vak/config/valid.toml
+4-4
diff --git a/‎src/vak/core/eval.py
+9-9 b/‎src/vak/core/eval.py
+9-9
@@ -46,17 +46,17 @@ def eval(toml_path):
     model_name = cfg.eval.model
     model_config = config.model.config_from_toml_path(toml_path, model_name)
 
-    if cfg.eval.csv_path is None:
+    if cfg.eval.dataset_path is None:
         raise ValueError(
-            "No value is specified for 'csv_path' in this .toml config file."
+            "No value is specified for 'dataset_path' in this .toml config file."
             f"To generate a .csv file that represents the dataset, "
             f"please run the following command:\n'vak prep {toml_path}'"
         )
 
     core.eval(
         model_name=model_name,
         model_config=model_config,
-        csv_path=cfg.eval.csv_path,
+        dataset_path=cfg.eval.dataset_path,
         checkpoint_path=cfg.eval.checkpoint_path,
         labelmap_path=cfg.eval.labelmap_path,
         output_dir=cfg.eval.output_dir,
 
@@ -53,9 +53,9 @@ def learning_curve(toml_path):
     model_name = cfg.learncurve.model
     model_config = config.model.config_from_toml_path(toml_path, model_name)
 
-    if cfg.learncurve.csv_path is None:
+    if cfg.learncurve.dataset_path is None:
         raise ValueError(
-            "No value is specified for 'csv_path' in this .toml config file."
+            "No value is specified for 'dataset_path' in this .toml config file."
             f"To generate a .csv file that represents the dataset, "
             f"please run the following command:\n'vak prep {toml_path}'"
         )
@@ -65,7 +65,7 @@ def learning_curve(toml_path):
         model_config=model_config,
         train_set_durs=cfg.learncurve.train_set_durs,
         num_replicates=cfg.learncurve.num_replicates,
-        csv_path=cfg.learncurve.csv_path,
+        dataset_path=cfg.learncurve.dataset_path,
         labelset=cfg.prep.labelset,
         window_size=cfg.dataloader.window_size,
         batch_size=cfg.learncurve.batch_size,
 
@@ -41,17 +41,17 @@ def predict(toml_path):
     model_name = cfg.predict.model
     model_config = config.model.config_from_toml_path(toml_path, model_name)
 
-    if cfg.predict.csv_path is None:
+    if cfg.predict.dataset_path is None:
         raise ValueError(
-            "No value is specified for 'csv_path' in this .toml config file."
+            "No value is specified for 'dataset_path' in this .toml config file."
             f"To generate a .csv file that represents the dataset, "
             f"please run the following command:\n'vak prep {toml_path}'"
         )
 
     core.predict(
         model_name=model_name,
         model_config=model_config,
-        csv_path=cfg.predict.csv_path,
+        dataset_path=cfg.predict.dataset_path,
         checkpoint_path=cfg.predict.checkpoint_path,
         labelmap_path=cfg.predict.labelmap_path,
         window_size=cfg.dataloader.window_size,
 
@@ -81,18 +81,18 @@ def prep(toml_path):
     """
     toml_path = Path(toml_path)
 
-    # open here because need to check for `csv_path` in this function, see #314 & #333
+    # open here because need to check for `dataset_path` in this function, see #314 & #333
     config_toml = _load_toml_from_path(toml_path)
     # ---- figure out purpose of config file from sections; will save csv path in that section -------------------------
     purpose = purpose_from_toml(config_toml, toml_path)
     if (
-        "csv_path" in config_toml[purpose.upper()]
-        and config_toml[purpose.upper()]["csv_path"] is not None
+        "dataset_path" in config_toml[purpose.upper()]
+        and config_toml[purpose.upper()]["dataset_path"] is not None
     ):
         raise ValueError(
-            f"config .toml file already has a 'csv_path' option in the '{purpose.upper()}' section, "
+            f"config .toml file already has a 'dataset_path' option in the '{purpose.upper()}' section, "
             f"and running `prep` would overwrite that value. To `prep` a new dataset, please remove "
-            f"the 'csv_path' option from the '{purpose.upper()}' section in the config file:\n{toml_path}"
+            f"the 'dataset_path' option from the '{purpose.upper()}' section in the config file:\n{toml_path}"
         )
 
     # now that we've checked that, go ahead and parse the sections we want
@@ -127,10 +127,10 @@ def prep(toml_path):
     section = purpose.upper()
     logger.info(
         f"Determined that purpose of config file is: {purpose}.\n"
-        f"Will add 'csv_path' option to '{section}' section."
+        f"Will add 'dataset_path' option to '{section}' section."
     )
 
-    vak_df, csv_path = core.prep(
+    vak_df, dataset_path = core.prep(
         data_dir=cfg.prep.data_dir,
         purpose=purpose,
         audio_format=cfg.prep.audio_format,
@@ -147,8 +147,8 @@ def prep(toml_path):
         test_dur=cfg.prep.test_dur,
     )
 
-    # use config and section from above to add csv_path to config.toml file
-    config_toml[section]["csv_path"] = str(csv_path)
+    # use config and section from above to add dataset_path to config.toml file
+    config_toml[section]["dataset_path"] = str(dataset_path)
 
     with toml_path.open("w") as fp:
         toml.dump(config_toml, fp)
@@ -52,9 +52,9 @@ def train(toml_path):
     model_name = cfg.train.model
     model_config = config.model.config_from_toml_path(toml_path, model_name)
 
-    if cfg.train.csv_path is None:
+    if cfg.train.dataset_path is None:
         raise ValueError(
-            "No value is specified for 'csv_path' in this .toml config file."
+            "No value is specified for 'dataset_path' in this .toml config file."
             f"To generate a .csv file that represents the dataset, "
             f"please run the following command:\n'vak prep {toml_path}'"
         )
@@ -67,7 +67,7 @@ def train(toml_path):
     core.train(
         model_name=model_name,
         model_config=model_config,
-        csv_path=cfg.train.csv_path,
+        dataset_path=cfg.train.dataset_path,
         labelset=labelset,
         window_size=cfg.dataloader.window_size,
         batch_size=cfg.train.batch_size,
 
@@ -64,8 +64,8 @@ class EvalConfig:
 
     Attributes
     ----------
-    csv_path : str
-        path to where dataset was saved as a csv.
+    dataset_path : str
+        Path to dataset, e.g., a csv file generated by running ``vak prep``.
     checkpoint_path : str
         path to directory with checkpoint files saved by Torch, to reload model
     output_dir : str
@@ -111,9 +111,9 @@ class EvalConfig:
     )
     batch_size = attr.ib(converter=int, validator=instance_of(int))
 
-    # csv_path is actually 'required' but we can't enforce that here because cli.prep looks at
-    # what sections are defined to figure out where to add csv_path after it creates the csv
-    csv_path = attr.ib(
+    # dataset_path is actually 'required' but we can't enforce that here because cli.prep looks at
+    # what sections are defined to figure out where to add dataset_path after it creates the csv
+    dataset_path = attr.ib(
         converter=converters.optional(expanded_user_path),
         default=None,
     )
 
@@ -16,8 +16,8 @@ class LearncurveConfig(TrainConfig):
     ----------
     model : str
         Model name, e.g., ``model = "TweetyNet"``
-    csv_path : str
-        path to where dataset was saved as a csv.
+    dataset_path : str
+        Path to dataset, e.g., a csv file generated by running ``vak prep``.
     num_epochs : int
         number of training epochs. One epoch = one iteration through the entire
         training set.
 
@@ -17,8 +17,8 @@ class PredictConfig:
 
      Attributes
      ----------
-     csv_path : str
-         path to where dataset was saved as a csv.
+     dataset_path : str
+         Path to dataset, e.g., a csv file generated by running ``vak prep``.
      checkpoint_path : str
          path to directory with checkpoint files saved by Torch, to reload model
      labelmap_path : str
@@ -62,7 +62,7 @@ class PredictConfig:
          before they are converted to annotations. Default is False.
          Typically the output will be "logits"
          to which a softmax transform might be applied.
-         For each item in the dataset--each row in  the `csv_path` .csv--
+         For each item in the dataset--each row in  the `dataset_path` .csv--
          the output will be saved in a separate file in `output_dir`,
          with the extension `{MODEL_NAME}.output.npz`. E.g., if the input is a
          spectrogram with `spect_path` filename `gy6or6_032312_081416.npz`,
@@ -80,9 +80,9 @@ class PredictConfig:
     )
     batch_size = attr.ib(converter=int, validator=instance_of(int))
 
-    # csv_path is actually 'required' but we can't enforce that here because cli.prep looks at
-    # what sections are defined to figure out where to add csv_path after it creates the csv
-    csv_path = attr.ib(
+    # dataset_path is actually 'required' but we can't enforce that here because cli.prep looks at
+    # what sections are defined to figure out where to add dataset_path after it creates the csv
+    dataset_path = attr.ib(
         converter=converters.optional(expanded_user_path),
         default=None,
     )
 
@@ -16,8 +16,8 @@ class TrainConfig:
     ----------
     model : str
         Model name, e.g., ``model = "TweetyNet"``
-    csv_path : str
-        path to where dataset was saved as a csv.
+    dataset_path : str
+        Path to dataset, e.g., a csv file generated by running ``vak prep``.
     num_epochs : int
         number of training epochs. One epoch = one iteration through the entire
         training set.
@@ -73,9 +73,9 @@ class TrainConfig:
     root_results_dir = attr.ib(converter=expanded_user_path)
 
     # optional
-    # csv_path is actually 'required' but we can't enforce that here because cli.prep looks at
-    # what sections are defined to figure out where to add csv_path after it creates the csv
-    csv_path = attr.ib(
+    # dataset_path is actually 'required' but we can't enforce that here because cli.prep looks at
+    # what sections are defined to figure out where to add dataset_path after it creates the csv
+    dataset_path = attr.ib(
         converter=converters.optional(expanded_user_path),
         default=None,
     )
 
@@ -36,7 +36,7 @@ window_size = 88
 [TRAIN]
 model = 'TweetyNet'
 root_results_dir = './tests/test_data/results/train'
-csv_path = 'tests/test_data/prep/train/032312_prep_191224_225912.csv'
+dataset_path = 'tests/test_data/prep/train/032312_prep_191224_225912.csv'
 num_workers = 4
 device = 'cuda'
 batch_size = 11
@@ -53,7 +53,7 @@ spect_scaler_path = '/home/user/results_181014_194418/spect_scaler'
 
 
 [EVAL]
-csv_path = 'tests/test_data/prep/learncurve/032312_prep_191224_225910.csv'
+dataset_path = 'tests/test_data/prep/learncurve/032312_prep_191224_225910.csv'
 checkpoint_path = '/home/user/results_181014_194418/TweetyNet/checkpoints/'
 labelmap_path = '/home/user/results_181014_194418/labelmap.json'
 output_dir = './tests/test_data/prep/learncurve'
@@ -76,7 +76,7 @@ ckpt_step = 1
 patience = 4
 train_set_durs = [ 4, 6 ]
 num_replicates = 2
-csv_path = 'tests/test_data/prep/learncurve/032312_prep_191224_225910.csv'
+dataset_path = 'tests/test_data/prep/learncurve/032312_prep_191224_225910.csv'
 results_dir_made_by_main_script = '/some/path/to/learncurve/'
 previous_run_path = '/some/path/to/learncurve/results_20210106_132152'
 post_tfm_kwargs = {'majority_vote' = true, 'min_segment_dur' = 0.01}
@@ -85,7 +85,7 @@ device = 'cuda'
 
 
 [PREDICT]
-csv_path = 'tests/test_data/prep/learncurve/032312_prep_191224_225910.csv'
+dataset_path = 'tests/test_data/prep/learncurve/032312_prep_191224_225910.csv'
 checkpoint_path = '/home/user/results_181014_194418/TweetyNet/checkpoints/'
 labelmap_path = '/home/user/results_181014_194418/labelmap.json'
 annot_csv_filename = '032312_prep_191224_225910.annot.csv'
 
@@ -25,7 +25,7 @@
 def eval(
     model_name: str,
     model_config: dict,
-    csv_path,
+    dataset_path,
     checkpoint_path,
     labelmap_path,
     output_dir,
@@ -48,8 +48,8 @@ def eval(
         Model configuration in a ``dict``,
         as loaded from a .toml file,
         and used by the model method ``from_config``.
-    csv_path : str, pathlib.Path
-        path to where dataset was saved as a csv.
+    dataset_path : str, pathlib.Path
+        Path to dataset, e.g., a csv file generated by running ``vak prep``.
     checkpoint_path : str, pathlib.Path
         path to directory with checkpoint files saved by Torch, to reload model
     output_dir : str, pathlib.Path
@@ -105,8 +105,8 @@ def eval(
     """
     # ---- pre-conditions ----------------------------------------------------------------------------------------------
     for path, path_name in zip(
-            (checkpoint_path, csv_path, labelmap_path, spect_scaler_path),
-            ('checkpoint_path', 'csv_path', 'labelmap_path', 'spect_scaler_path'),
+            (checkpoint_path, dataset_path, labelmap_path, spect_scaler_path),
+            ('checkpoint_path', 'dataset_path', 'labelmap_path', 'spect_scaler_path'),
     ):
         if path is not None:  # because `spect_scaler_path` is optional
             if not validators.is_a_file(path):
@@ -148,9 +148,9 @@ def eval(
         window_size=window_size,
         return_padding_mask=True,
     )
-    logger.info(f"creating dataset for evaluation from: {csv_path}")
+    logger.info(f"creating dataset for evaluation from: {dataset_path}")
     val_dataset = VocalDataset.from_csv(
-        csv_path=csv_path,
+        csv_path=dataset_path,
         split=split,
         labelmap=labelmap,
         spect_key=spect_key,
@@ -173,7 +173,7 @@ def eval(
         input_shape = input_shape[1:]
 
     if post_tfm_kwargs:
-        dataset_df = pd.read_csv(csv_path)
+        dataset_df = pd.read_csv(dataset_path)
         # we use the timebins vector from the first spect path to get timebin dur.
         # this is less careful than calling io.dataframe.validate_and_get_timebin_dur
         # but it's also much faster, and we can assume dataframe was validated when it was made
@@ -227,7 +227,7 @@ def eval(
             ("checkpoint_path", checkpoint_path),
             ("labelmap_path", labelmap_path),
             ("spect_scaler_path", spect_scaler_path),
-            ("csv_path", csv_path),
+            ("dataset_path", dataset_path),
         ]
     )
     # TODO: is this still necessary after switching to Lightning? Stop saying "average"?