Skip to content
This repository has been archived by the owner on Sep 4, 2024. It is now read-only.

Commit

Permalink
Merge pull request #345 from specifysystems/341-specifying-key_field-…
Browse files Browse the repository at this point in the history
…parameter-for-split_occurrence_data-causes-exception

341 specifying key field parameter for split occurrence data causes exception
  • Loading branch information
cjgrady authored Jun 2, 2022
2 parents 0fbc370 + 5066897 commit 088235f
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 5 deletions.
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ authors:
given-names: James
orcid: https://orcid.org/0000-0001-8684-1764
cff-version: 1.2.0
date-released: '2022-05-20'
date-released: '2022-06-02'
message: If you use this software, please cite it as below.
title: Specify Systems Lifemapper Python Library (lmpy)
url: https://github.com/specifysystems/lmpy
version: 3.1.17
version: 3.1.21
2 changes: 1 addition & 1 deletion lmpy/data_preparation/occurrence_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def key_from_fields_func(point):
Returns:
Object: An object representing the key for the particular point.
"""
writer_key = [point.get_attribute(fld) for fld in key_fields]
writer_key = tuple(list(point.get_attribute(fld) for fld in key_fields))
if len(writer_key) == 1:
return writer_key[0]
return writer_key
Expand Down
14 changes: 12 additions & 2 deletions lmpy/point.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,21 +300,31 @@ class PointCsvWriter:
"""Class for writing Points to a CSV file."""

# .......................
def __init__(self, filename, fields, write_headers=True, mode='w', **kwargs):
def __init__(
self,
filename,
fields,
write_headers=True,
mode='w',
encoding='utf8',
**kwargs
):
"""Constructor for writing points to csv file.
Args:
filename (:obj:`str`): A file location to write points to.
fields (:obj:`list`): A list of fields to include in the csv headers.
write_headers (:obj:`bool`): Should headers be written.
mode (:obj:`str`): File write mode.
encoding (str): The encoding to use when writing data.
**kwargs (:obj:`dict`): Keyword parameters that will be passed to the
DictWriter instance from the csv module.
"""
self.filename = filename
self.file = None
self.writer = None
self.field_names = fields
self.encoding = encoding
self.kwargs = kwargs
self.write_headers = write_headers
self.file_mode = mode
Expand Down Expand Up @@ -346,7 +356,7 @@ def close(self):
# .......................
def open(self):
"""Open file for writing."""
self.file = open(self.filename, self.file_mode)
self.file = open(self.filename, self.file_mode, encoding=self.encoding)
self.writer = csv.DictWriter(self.file, self.field_names, **self.kwargs)
if self.write_headers:
self.writer.writeheader()
Expand Down
88 changes: 88 additions & 0 deletions tests/test_tools/test_split_occurrence_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,3 +536,91 @@ def test_complex(monkeypatch, generate_temp_filename, temp_directory):
with open(species_list_filename, mode='rt') as species_in:
for line in species_in:
assert line.strip() in list(SPECIES_MAP.values())


# .....................................................................................
def test_multiple_key_fields_config(
monkeypatch,
generate_temp_filename,
temp_directory
):
"""Tests specifying the key_field parameter.
Args:
monkeypatch (pytest.fixture): A fixture for monkeypatching.
generate_temp_filename (pytest.fixture): A fixture for generating filenames.
temp_directory (pytest.fixture): A fixture to get a temporary directory.
"""
# Temporary files
dwca_filename = generate_temp_filename()
wrangler_config_filename = generate_temp_filename()

# Generate a DWCA and wranglers
dwca_fields = [
SimulatedField(
'scientificName',
'http://rs.tdwg.org/dwc/terms/specificEpithet',
get_random_choice_func(list(SPECIES_MAP.keys())),
'str'
),
SimulatedField(
'genus',
'',
get_random_choice_func(['GenusA', 'GenusB']),
'str'
),
SimulatedField(
'sp',
'',
get_random_choice_func(['SpeciesA', 'SpeciesB', 'SpeciesB']),
'str'
),
SimulatedField(
'latitude',
'http://rs.tdwg.org/dwc/terms/decimalLatitude',
get_random_float_func(-90.0, 90.0, 2, 6),
'float'
),
SimulatedField(
'longitude',
'http://rs.tdwg.org/dwc/terms/decimalLongitude',
get_random_float_func(-180.0, 180.0, 2, 6),
'float'
)
]
generate_dwca(dwca_filename, 1000, dwca_fields)
with open(wrangler_config_filename, mode='wt') as json_out:
json.dump([], json_out)

# Create config file for script
script_config_filename = generate_temp_filename(suffix='.json')
with open(script_config_filename, mode='wt') as json_out:
json.dump(
{
'max_open_writers': 100,
'key_field': ['genus', 'scientificName'],
'dwca': [
[
dwca_filename,
wrangler_config_filename
]
],
'out_dir': temp_directory
},
json_out
)

# Run script
params = [
'split_occurrence_data.py',
'--config_file',
script_config_filename,
]

monkeypatch.setattr('sys.argv', params)
cli()

# Check output
assert validate_point_csvs(
glob.glob(f'{temp_directory}/*.csv'), 'species_name', 'x', 'y'
)

0 comments on commit 088235f

Please sign in to comment.