Merge pull request #345 from specifysystems/341-specifying-key_field-…

…parameter-for-split_occurrence_data-causes-exception 341 specifying key field parameter for split occurrence data causes exception
specifysystems · Jun 2, 2022 · 088235f · 088235f
2 parents 0fbc370 + 5066897
commit 088235f
Show file tree

Hide file tree

Showing 4 changed files with 103 additions and 5 deletions.
diff --git a/CITATION.cff b/CITATION.cff
@@ -9,8 +9,8 @@ authors:
   given-names: James
   orcid: https://orcid.org/0000-0001-8684-1764
 cff-version: 1.2.0
-date-released: '2022-05-20'
+date-released: '2022-06-02'
 message: If you use this software, please cite it as below.
 title: Specify Systems Lifemapper Python Library (lmpy)
 url: https://github.com/specifysystems/lmpy
-version: 3.1.17
+version: 3.1.21
diff --git a/lmpy/data_preparation/occurrence_splitter.py b/lmpy/data_preparation/occurrence_splitter.py
@@ -59,7 +59,7 @@ def key_from_fields_func(point):
         Returns:
             Object: An object representing the key for the particular point.
         """
-        writer_key = [point.get_attribute(fld) for fld in key_fields]
+        writer_key = tuple(list(point.get_attribute(fld) for fld in key_fields))
         if len(writer_key) == 1:
             return writer_key[0]
         return writer_key

diff --git a/lmpy/point.py b/lmpy/point.py
@@ -300,21 +300,31 @@ class PointCsvWriter:
     """Class for writing Points to a CSV file."""
 
     # .......................
-    def __init__(self, filename, fields, write_headers=True, mode='w', **kwargs):
+    def __init__(
+        self,
+        filename,
+        fields,
+        write_headers=True,
+        mode='w',
+        encoding='utf8',
+        **kwargs
+    ):
         """Constructor for writing points to csv file.
 
         Args:
             filename (:obj:`str`): A file location to write points to.
             fields (:obj:`list`): A list of fields to include in the csv headers.
             write_headers (:obj:`bool`): Should headers be written.
             mode (:obj:`str`): File write mode.
+            encoding (str): The encoding to use when writing data.
             **kwargs (:obj:`dict`): Keyword parameters that will be passed to the
                 DictWriter instance from the csv module.
         """
         self.filename = filename
         self.file = None
         self.writer = None
         self.field_names = fields
+        self.encoding = encoding
         self.kwargs = kwargs
         self.write_headers = write_headers
         self.file_mode = mode
@@ -346,7 +356,7 @@ def close(self):
     # .......................
     def open(self):
         """Open file for writing."""
-        self.file = open(self.filename, self.file_mode)
+        self.file = open(self.filename, self.file_mode, encoding=self.encoding)
         self.writer = csv.DictWriter(self.file, self.field_names, **self.kwargs)
         if self.write_headers:
             self.writer.writeheader()

diff --git a/tests/test_tools/test_split_occurrence_data.py b/tests/test_tools/test_split_occurrence_data.py
@@ -536,3 +536,91 @@ def test_complex(monkeypatch, generate_temp_filename, temp_directory):
     with open(species_list_filename, mode='rt') as species_in:
         for line in species_in:
             assert line.strip() in list(SPECIES_MAP.values())
+
+
+# .....................................................................................
+def test_multiple_key_fields_config(
+    monkeypatch,
+    generate_temp_filename,
+    temp_directory
+):
+    """Tests specifying the key_field parameter.
+
+    Args:
+        monkeypatch (pytest.fixture): A fixture for monkeypatching.
+        generate_temp_filename (pytest.fixture): A fixture for generating filenames.
+        temp_directory (pytest.fixture): A fixture to get a temporary directory.
+    """
+    # Temporary files
+    dwca_filename = generate_temp_filename()
+    wrangler_config_filename = generate_temp_filename()
+
+    # Generate a DWCA and wranglers
+    dwca_fields = [
+        SimulatedField(
+            'scientificName',
+            'http://rs.tdwg.org/dwc/terms/specificEpithet',
+            get_random_choice_func(list(SPECIES_MAP.keys())),
+            'str'
+        ),
+        SimulatedField(
+            'genus',
+            '',
+            get_random_choice_func(['GenusA', 'GenusB']),
+            'str'
+        ),
+        SimulatedField(
+            'sp',
+            '',
+            get_random_choice_func(['SpeciesA', 'SpeciesB', 'SpeciesB']),
+            'str'
+        ),
+        SimulatedField(
+            'latitude',
+            'http://rs.tdwg.org/dwc/terms/decimalLatitude',
+            get_random_float_func(-90.0, 90.0, 2, 6),
+            'float'
+        ),
+        SimulatedField(
+            'longitude',
+            'http://rs.tdwg.org/dwc/terms/decimalLongitude',
+            get_random_float_func(-180.0, 180.0, 2, 6),
+            'float'
+        )
+    ]
+    generate_dwca(dwca_filename, 1000, dwca_fields)
+    with open(wrangler_config_filename, mode='wt') as json_out:
+        json.dump([], json_out)
+
+    # Create config file for script
+    script_config_filename = generate_temp_filename(suffix='.json')
+    with open(script_config_filename, mode='wt') as json_out:
+        json.dump(
+            {
+                'max_open_writers': 100,
+                'key_field': ['genus', 'scientificName'],
+                'dwca': [
+                    [
+                        dwca_filename,
+                        wrangler_config_filename
+                    ]
+                ],
+                'out_dir': temp_directory
+            },
+            json_out
+        )
+
+    # Run script
+    params = [
+        'split_occurrence_data.py',
+        '--config_file',
+        script_config_filename,
+    ]
+
+    monkeypatch.setattr('sys.argv', params)
+    cli()
+
+    # Check output
+    assert validate_point_csvs(
+        glob.glob(f'{temp_directory}/*.csv'), 'species_name', 'x', 'y'
+    )