From 2cc420e784ce624ff1c432c7f8954dff8999b7fb Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Wed, 17 Jul 2024 09:30:43 +0200 Subject: [PATCH 1/7] wip: bare bones of a calendar daterange class --- src/pymorize/calendar.py | 61 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 src/pymorize/calendar.py diff --git a/src/pymorize/calendar.py b/src/pymorize/calendar.py new file mode 100644 index 0000000..3685762 --- /dev/null +++ b/src/pymorize/calendar.py @@ -0,0 +1,61 @@ +""" +Yet another calendar implementation... + +This module provides functions for listing files for a specific date range +""" + +import datetime + + +class CalendarRange: + + def __init__( + self, + start: datetime.datetime, + end: datetime.datetime, + freq: datetime.timedelta = datetime.timedelta("1 month"), + periods: int = None, + ): + # Determine which 3 are given + # If freq is given, calculate periods + if freq: + if periods: + raise ValueError("Cannot specify both freq and periods") + periods = (end - start) // freq + # If periods is given, calculate freq + elif periods: + freq = (end - start) // periods + # If none are given, raise an error + else: + raise valueerror("must specify either freq or periods") + # Create range + self._range = [start + i * freq for i in range(periods)] + self._start = start + self._end = end + self._periods = periods + + @property + def start(self): + return self._start + + @property + def end(self): + return self._end + + def __contains__(self, date_to_check): + return date_to_check in self._range + + def __len__(self): + return len(self._range) + + def __iter__(self): + return iter(self._range) + + def __getitem__(self, index): + return self._range[index] + + def __repr__(self): + return f"CalendarRange(start={self._start}, end={self._end}, periods={self._periods})" + + def __str__(self): + return f"{self._start} to {self._end} in {self._periods} periods" From 8806350a3ad8c063fb9ef6461bd6acb669824796 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Wed, 17 Jul 2024 11:21:45 +0200 Subject: [PATCH 2/7] fix: minor syntax errors --- setup.py | 1 + src/pymorize/calendar.py | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index c4e643d..b26b9d0 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ def read(filename): install_requires=[ "cf_xarray", "click-loguru", + "pendulum", "pint-xarray", "pyyaml", "questionary", diff --git a/src/pymorize/calendar.py b/src/pymorize/calendar.py index 3685762..e7e4525 100644 --- a/src/pymorize/calendar.py +++ b/src/pymorize/calendar.py @@ -4,16 +4,16 @@ This module provides functions for listing files for a specific date range """ -import datetime +import pendulum class CalendarRange: def __init__( self, - start: datetime.datetime, - end: datetime.datetime, - freq: datetime.timedelta = datetime.timedelta("1 month"), + start: pendulum.datetime, + end: pendulum.datetime, + freq: pendulum.Duration = pendulum.duration(months=1), periods: int = None, ): # Determine which 3 are given @@ -27,7 +27,7 @@ def __init__( freq = (end - start) // periods # If none are given, raise an error else: - raise valueerror("must specify either freq or periods") + raise ValueError("must specify either freq or periods") # Create range self._range = [start + i * freq for i in range(periods)] self._start = start From 795cc2f18e91240e944857df48f80021a1e5d529 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Wed, 17 Jul 2024 15:43:57 +0200 Subject: [PATCH 3/7] feat(calendar): allow generation of year bounds Closes #15 --- src/pymorize/calendar.py | 89 ++++++++++++++++++++++++++++++++++++++++ tests/test_calendar.py | 70 +++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 tests/test_calendar.py diff --git a/src/pymorize/calendar.py b/src/pymorize/calendar.py index e7e4525..4a43267 100644 --- a/src/pymorize/calendar.py +++ b/src/pymorize/calendar.py @@ -4,7 +4,96 @@ This module provides functions for listing files for a specific date range """ +import time + import pendulum +from loguru import logger + + +def year_bounds_major_digits(first, last, step, binning_digit): + """ + Generate year ranges with a specific first digit. + + This function generates a list of year ranges (bounds) where each range starts with a specific digit (binning_digit). + The ranges are generated from a given start year (first) to an end year (last) with a specific step size. + + Parameters + ---------- + first : int + The first year in the range. + last : int + The last year in the range. + step : int + The step size for the range. + binning_digit : int + The digit that each range should start with. + + Returns + ------- + list + A list of lists where each inner list is a range of years. + + Raises + ------ + ValueError + If the binning_digit is greater than 10. + + Examples + -------- + >>> year_bounds_major_digits(2000, 2010, 2, 2) + [[2000, 2001], [2002, 2003], [2004, 2005], [2006, 2007], [2008, 2009], [2010, 2010]] + + >>> year_bounds_major_digits(2000, 2010, 3, 3) + [[2000, 2002], [2003, 2005], [2006, 2008], [2009, 2010]] + + Notes + ----- + This function uses a while loop to iterate through the years from first to last. + It checks the ones digit of the current year and compares it with the binning_digit to determine the start of a new range. + If the first range is undersized (i.e., the binning_digit is in the ones digit of the first few years), + the function will continue to increment the current year until it hits the binning_digit. + If the first range is not undersized, the function will continue to increment the current year until it hits the next binning_digit. + Once a range is completed, it is appended to the bounds list and the process continues until the last year is reached. + """ + # NOTE(PG): This is a bit hacky and difficult to read, but all the tests pass... + if binning_digit >= 10: + raise ValueError("Give a binning_digit less than 10") + bounds = [] + current_location = bin_start = first + first_bin_is_undersized = binning_digit in [ + i % 10 for i in range(first, first + step) + ] + bin_end = "underfull bin" if first_bin_is_undersized else bin_start + step + first_bin_empty = True + + while current_location <= last: + ones_digit = current_location % 10 + + if first_bin_empty: + if first_bin_is_undersized: + # Go until you hit the binning digit + if ones_digit != binning_digit: + current_location += 1 + ones_digit = current_location % 10 + else: + bounds.append([bin_start, current_location - 1]) + first_bin_empty = False + bin_start = current_location + else: + # Go until you hit the next binning digit + if ones_digit == binning_digit: + bounds.append([bin_start, current_location - 1]) + first_bin_empty = False + bin_start = current_location + else: + current_location += 1 + else: + bin_end = bin_start + step + current_location += 1 + if current_location == bin_end or current_location > last: + bounds.append([bin_start, min(current_location - 1, last)]) + bin_start = current_location + return bounds class CalendarRange: diff --git a/tests/test_calendar.py b/tests/test_calendar.py new file mode 100644 index 0000000..208c48a --- /dev/null +++ b/tests/test_calendar.py @@ -0,0 +1,70 @@ +import pymorize.calendar + + +def test_year_bounds_major_digits_first_can_end_with_binning_digit(): + bounds = pymorize.calendar.year_bounds_major_digits( + first=2700, last=2720, step=10, binning_digit=1 + ) + assert [[2700, 2700], [2701, 2710], [2711, 2720]] == bounds + + +def test_year_bounds_major_digits_can_start_1before_major_digit1(): + bounds = pymorize.calendar.year_bounds_major_digits( + first=2050, last=2070, step=10, binning_digit=1 + ) + assert [[2050, 2050], [2051, 2060], [2061, 2070]] == bounds + + +def test_year_bounds_major_digits_can_have_no_complete_range(): + bounds = pymorize.calendar.year_bounds_major_digits( + first=2050, last=2055, step=10, binning_digit=1 + ) + assert [[2050, 2050], [2051, 2055]] == bounds + + +def test_year_bounds_major_digits_can_start_3before_major_digit3(): + bounds = pymorize.calendar.year_bounds_major_digits( + first=2050, last=2070, step=10, binning_digit=3 + ) + assert [[2050, 2052], [2053, 2062], [2063, 2070]] == bounds + + +def test_year_bounds_major_digits_can_start_9before_major_digit1(): + bounds = pymorize.calendar.year_bounds_major_digits( + first=2042, last=2070, step=10, binning_digit=1 + ) + assert [[2042, 2050], [2051, 2060], [2061, 2070]] == bounds + + +def test_year_bounds_major_digits_can_start_1before_major_digit1_with_step20(): + bounds = pymorize.calendar.year_bounds_major_digits( + first=2050, last=2080, step=20, binning_digit=1 + ) + assert [[2050, 2050], [2051, 2070], [2071, 2080]] == bounds + + +def test_year_bounds_major_digits_can_start_3before_major_digit3_with_step5(): + bounds = pymorize.calendar.year_bounds_major_digits( + first=2050, last=2070, step=5, binning_digit=3 + ) + assert [ + [2050, 2052], + [2053, 2057], + [2058, 2062], + [2063, 2067], + [2068, 2070], + ] == bounds + + +def test_year_bounds_major_digits_can_start_1before_major_digit1_with_step1(): + bounds = pymorize.calendar.year_bounds_major_digits( + first=2050, last=2055, step=1, binning_digit=1 + ) + assert [ + [2050, 2050], + [2051, 2051], + [2052, 2052], + [2053, 2053], + [2054, 2054], + [2055, 2055], + ] == bounds From 867a73074cec4026cc787922439220248f7c669f Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Tue, 23 Jul 2024 09:10:09 +0200 Subject: [PATCH 4/7] wip: date ranges with tests --- src/pymorize/calendar.py | 10 +++++ tests/test_calendar.py | 87 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/src/pymorize/calendar.py b/src/pymorize/calendar.py index 4a43267..b3ca8a1 100644 --- a/src/pymorize/calendar.py +++ b/src/pymorize/calendar.py @@ -148,3 +148,13 @@ def __repr__(self): def __str__(self): return f"{self._start} to {self._end} in {self._periods} periods" + + def __list__(self) -> list: + return self._range + + @classmethod + def from_bounds(cls, bounds, freq=pendulum.duration(months=1), periods=None): + clses = [] + for start, end in bounds: + clses.append(cls(start, end, freq, periods)) + return *clses diff --git a/tests/test_calendar.py b/tests/test_calendar.py index 208c48a..903302c 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -1,4 +1,91 @@ +import pendulum + import pymorize.calendar +from pymorize.calendar import CalendarRange + + +def setup_calendar_range(): + start = pendulum.datetime(2022, 1, 1) + end = pendulum.datetime(2022, 12, 31) + freq = pendulum.duration(months=1) + return CalendarRange(start, end, freq) + + +def test_init(): + calendar_range = setup_calendar_range() + assert calendar_range.start == pendulum.datetime(2022, 1, 1) + assert calendar_range.end == pendulum.datetime(2022, 12, 31) + assert len(calendar_range) == 12 + + +def test_contains(): + calendar_range = setup_calendar_range() + assert pendulum.datetime(2022, 5, 1) in calendar_range + assert pendulum.datetime(2023, 1, 1) not in calendar_range + + +def test_len(): + calendar_range = setup_calendar_range() + assert len(calendar_range) == 12 + + +def test_iter(): + calendar_range = setup_calendar_range() + dates = [date for date in calendar_range] + assert len(dates) == 12 + + +def test_getitem(): + calendar_range = setup_calendar_range() + assert calendar_range[0] == pendulum.datetime(2022, 1, 1) + assert calendar_range[-1] == pendulum.datetime(2022, 12, 31) + + +def test_repr(): + calendar_range = setup_calendar_range() + assert ( + repr(calendar_range) + == "CalendarRange(start=2022-01-01T00:00:00+00:00, end=2022-12-31T00:00:00+00:00, periods=12)" + ) + + +def test_str(): + calendar_range = setup_calendar_range() + assert ( + str(calendar_range) + == "2022-01-01T00:00:00+00:00 to 2022-12-31T00:00:00+00:00 in 12 periods" + ) + + +def test_from_bounds(): + bounds = [(pendulum.datetime(2022, 1, 1), pendulum.datetime(2022, 12, 31))] + freq = pendulum.duration(months=1) + calendar_ranges = CalendarRange.from_bounds(bounds, freq) + assert len(calendar_ranges) == 1 + assert calendar_ranges[0].start == pendulum.datetime(2022, 1, 1) + assert calendar_ranges[0].end == pendulum.datetime(2022, 12, 31) + + +def test_from_bounds_multiple(): + bounds = [ + (pendulum.datetime(2022, 1, 1), pendulum.datetime(2022, 6, 30)), + (pendulum.datetime(2022, 7, 1), pendulum.datetime(2022, 12, 31)), + ] + freq = pendulum.duration(months=1) + calendar_ranges = CalendarRange.from_bounds(bounds, freq) + assert len(calendar_ranges) == 2 + assert calendar_ranges[0].start == pendulum.datetime(2022, 1, 1) + assert calendar_ranges[0].end == pendulum.datetime(2022, 6, 30) + assert calendar_ranges[1].start == pendulum.datetime(2022, 7, 1) + assert calendar_ranges[1].end == pendulum.datetime(2022, 12, 31) + + +def test_from_bounds_integers(): + bounds = [(2700, 2720)] + calendar_ranges = CalendarRange.from_bounds(bounds) + assert len(calendar_ranges) == 1 + assert calendar_ranges[0].start == pendulum.datetime(2700, 1, 1) + assert calendar_ranges[0].end == pendulum.datetime(2720, 12, 31) def test_year_bounds_major_digits_first_can_end_with_binning_digit(): From 4c52dd4c4ac1de4330e4112bad863996dd96ee36 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Tue, 23 Jul 2024 13:40:33 +0200 Subject: [PATCH 5/7] feat: generation of calendar ranges --- src/pymorize/calendar.py | 165 +++++++++++++++++++++++++-------------- tests/test_calendar.py | 106 ++++++++----------------- 2 files changed, 138 insertions(+), 133 deletions(-) diff --git a/src/pymorize/calendar.py b/src/pymorize/calendar.py index b3ca8a1..9c7e693 100644 --- a/src/pymorize/calendar.py +++ b/src/pymorize/calendar.py @@ -1,16 +1,36 @@ """ -Yet another calendar implementation... +Yet another calendar implementation. -This module provides functions for listing files for a specific date range -""" +This module provides functions for creating date ranges. + +The main components of this module are: + +- ``year_bounds_major_digits``: generates a list of year ranges (bounds) where each range starts with a specific digit. +- ``date_ranges_from_bounds``: creates a list of date indexes from bounds +- ``date_ranges_from_year_bounds``: creates a list of date indexes from year bounds +- ``simple_ranges_from_bounds``: creates a list of simple ranges from bounds -import time +Examples +-------- +>>> year_bounds = year_bounds_major_digits(2000, 2010, 2, 2) +>>> print(year_bounds) +[[2000, 2001], [2002, 2003], [2004, 2005], [2006, 2007], [2008, 2009], [2010, 2010]] + +>>> date_range = date_range_from_bounds(year_bounds, freq="Y") +>>> print(date_range) +([Timestamp('2000-12-31 00:00:00', freq='A-DEC'), Timestamp('2001-12-31 00:00:00', freq='A-DEC')], [Timestamp('2002-12-31 00:00:00', freq='A-DEC'), Timestamp('2003-12-31 00:00:00', freq='A-DEC')], [Timestamp('2004-12-31 00:00:00', freq='A-DEC'), Timestamp('2005-12-31 00:00:00', freq='A-DEC')], [Timestamp('2006-12-31 00:00:00', freq='A-DEC'), Timestamp('2007-12-31 00:00:00', freq='A-DEC')], [Timestamp('2008-12-31 00:00:00', freq='A-DEC'), Timestamp('2009-12-31 00:00:00', freq='A-DEC')], [Timestamp('2010-12-31 00:00:00', freq='A-DEC')]) + +>>> date_range = date_range_from_year_bounds(year_bounds, freq="Y") +>>> print(date_range) +([Timestamp('2000-12-31 00:00:00', freq='A-DEC'), Timestamp('2001-12-31 00:00:00', freq='A-DEC')], [Timestamp('2002-12-31 00:00:00', freq='A-DEC'), Timestamp('2003-12-31 00:00:00', freq='A-DEC')], [Timestamp('2004-12-31 00:00:00', freq='A-DEC'), Timestamp('2005-12-31 00:00:00', freq='A-DEC')], [Timestamp('2006-12-31 00:00:00', freq='A-DEC'), Timestamp('2007-12-31 00:00:00', freq='A-DEC')], [Timestamp('2008-12-31 00:00:00', freq='A-DEC'), Timestamp('2009-12-31 00:00:00', freq='A-DEC')], [Timestamp('2010-12-31 00:00:00', freq='A-DEC')]) +""" import pendulum +import xarray as xr from loguru import logger -def year_bounds_major_digits(first, last, step, binning_digit): +def year_bounds_major_digits(first, last, step, binning_digit, return_type=int): """ Generate year ranges with a specific first digit. @@ -27,6 +47,8 @@ def year_bounds_major_digits(first, last, step, binning_digit): The step size for the range. binning_digit : int The digit that each range should start with. + return_type : type, optional + The type of the elements in the returned list, either int or pendulum.DateTime. Defaults to int. Returns ------- @@ -56,6 +78,9 @@ def year_bounds_major_digits(first, last, step, binning_digit): Once a range is completed, it is appended to the bounds list and the process continues until the last year is reached. """ # NOTE(PG): This is a bit hacky and difficult to read, but all the tests pass... + logger.debug( + f"Running year_bounds_major_digits({first=}, {last=}, {step=}, {binning_digit=})" + ) if binning_digit >= 10: raise ValueError("Give a binning_digit less than 10") bounds = [] @@ -64,6 +89,7 @@ def year_bounds_major_digits(first, last, step, binning_digit): i % 10 for i in range(first, first + step) ] bin_end = "underfull bin" if first_bin_is_undersized else bin_start + step + logger.debug(f"first_bin_is_undersized: {first_bin_is_undersized}") first_bin_empty = True while current_location <= last: @@ -77,12 +103,18 @@ def year_bounds_major_digits(first, last, step, binning_digit): ones_digit = current_location % 10 else: bounds.append([bin_start, current_location - 1]) + logger.debug( + f"Appending bounds {bin_start=}, {current_location-1=}" + ) first_bin_empty = False bin_start = current_location else: # Go until you hit the next binning digit if ones_digit == binning_digit: bounds.append([bin_start, current_location - 1]) + logger.debug( + f"Appending bounds {bin_start=}, {current_location-1=}" + ) first_bin_empty = False bin_start = current_location else: @@ -92,69 +124,82 @@ def year_bounds_major_digits(first, last, step, binning_digit): current_location += 1 if current_location == bin_end or current_location > last: bounds.append([bin_start, min(current_location - 1, last)]) + logger.debug( + f"Appending bounds {bin_start=}, {min(current_location-1, last)=}" + ) bin_start = current_location - return bounds - - -class CalendarRange: - - def __init__( - self, - start: pendulum.datetime, - end: pendulum.datetime, - freq: pendulum.Duration = pendulum.duration(months=1), - periods: int = None, - ): - # Determine which 3 are given - # If freq is given, calculate periods - if freq: - if periods: - raise ValueError("Cannot specify both freq and periods") - periods = (end - start) // freq - # If periods is given, calculate freq - elif periods: - freq = (end - start) // periods - # If none are given, raise an error - else: - raise ValueError("must specify either freq or periods") - # Create range - self._range = [start + i * freq for i in range(periods)] - self._start = start - self._end = end - self._periods = periods + if return_type is int: + return [[int(i) for i in bound] for bound in bounds] + elif return_type is pendulum.DateTime: + return [[pendulum.datetime(int(i), 1, 1) for i in bound] for bound in bounds] + else: + raise ValueError("return_type must be either int or pendulum.DateTime") - @property - def start(self): - return self._start - @property - def end(self): - return self._end +def date_ranges_from_bounds(bounds, freq: str = "M", **kwargs): + """ + Class method to create a list of instances from a list of start and end bounds. - def __contains__(self, date_to_check): - return date_to_check in self._range + Parameters + ---------- + bounds : list of tuple of str or datetime-like + A list of strings or datetime-like tuples each containing a start and end bound. + freq : str, optional + The frequency of the periods. Defaults to one month. + **kwargs : + Additional keyword arguments to pass to the date_range function. - def __len__(self): - return len(self._range) + Returns + ------- + tuple + A tuple containing instances of the class for each provided bound. - def __iter__(self): - return iter(self._range) + Examples + -------- + >>> bounds = [("2020-01-01", "2020-01-31"), ("2020-02-01", "2020-02-29")] + >>> date_ranges = date_ranges_from_bounds(bounds) + >>> print(date_ranges) + (DatetimeIndex(['2020-01-01', '2020-01-02', ..., '2020-01-31'], dtype='datetime64[ns]', freq='D'), + DatetimeIndex(['2020-02-01', '2020-02-02', ..., '2020-02-29'], dtype='datetime64[ns]', freq='D')) + + >>> bounds = [("2020-01-01", "2020-12-31")] + >>> date_ranges = date_ranges_from_bounds(bounds, freq="M") + >>> print(date_ranges) + (DatetimeIndex(['2020-01-31', '2020-02-29', ..., '2020-12-31'], dtype='datetime64[ns]', freq='M'),) + """ + objs = [] + for start, end in bounds: + objs.append(xr.date_range(start=start, end=end, freq=freq, **kwargs)) + if len(objs) == 1: + return objs[0] + return (*objs,) - def __getitem__(self, index): - return self._range[index] - def __repr__(self): - return f"CalendarRange(start={self._start}, end={self._end}, periods={self._periods})" +def date_ranges_from_year_bounds(year_bounds, freq: str = "M", **kwargs): + """ + Class method to create a list of instances from a list of year bounds. - def __str__(self): - return f"{self._start} to {self._end} in {self._periods} periods" + Parameters + ---------- + year_bounds : list of lists or tuples + A list of lists, each containing a start and end year. + freq : str, optional + The frequency of the periods. Defaults to one month. + **kwargs : + Additional keyword arguments to pass to the date_range function. + """ + bounds = [ + (pendulum.datetime(start, 1, 1), pendulum.datetime(end, 12, 31)) + for start, end in year_bounds + ] + return date_ranges_from_bounds(bounds, freq, **kwargs) - def __list__(self) -> list: - return self._range - @classmethod - def from_bounds(cls, bounds, freq=pendulum.duration(months=1), periods=None): - clses = [] - for start, end in bounds: - clses.append(cls(start, end, freq, periods)) - return *clses +def simple_ranges_from_bounds(bounds): + """ + Create a list of simple ranges from a list of bounds. + """ + if len(bounds) == 1: + start, end = bounds[0] + return range(start, end + 1) + return [range(start, end + 1) for start, end in bounds] diff --git a/tests/test_calendar.py b/tests/test_calendar.py index 903302c..8ef18d0 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -1,91 +1,51 @@ -import pendulum +import xarray as xr import pymorize.calendar -from pymorize.calendar import CalendarRange -def setup_calendar_range(): - start = pendulum.datetime(2022, 1, 1) - end = pendulum.datetime(2022, 12, 31) - freq = pendulum.duration(months=1) - return CalendarRange(start, end, freq) +def test_simple_ranges_from_bounds(): + bounds = [(1, 5), (10, 15)] + result = list(pymorize.calendar.simple_ranges_from_bounds(bounds)) + expected = [range(1, 6), range(10, 16)] + assert result == expected -def test_init(): - calendar_range = setup_calendar_range() - assert calendar_range.start == pendulum.datetime(2022, 1, 1) - assert calendar_range.end == pendulum.datetime(2022, 12, 31) - assert len(calendar_range) == 12 +def test_single_range(): + bounds = [(1, 5)] + result = pymorize.calendar.simple_ranges_from_bounds(bounds) + expected = range(1, 6) + assert result == expected -def test_contains(): - calendar_range = setup_calendar_range() - assert pendulum.datetime(2022, 5, 1) in calendar_range - assert pendulum.datetime(2023, 1, 1) not in calendar_range +def test_single_range_single_element(): + bounds = [(3, 3)] + result = pymorize.calendar.simple_ranges_from_bounds(bounds) + expected = range(3, 4) + assert result == expected -def test_len(): - calendar_range = setup_calendar_range() - assert len(calendar_range) == 12 +def test_single_range_negative(): + bounds = [(-5, -1)] + result = pymorize.calendar.simple_ranges_from_bounds(bounds) + expected = range(-5, 0) + assert result == expected -def test_iter(): - calendar_range = setup_calendar_range() - dates = [date for date in calendar_range] - assert len(dates) == 12 - - -def test_getitem(): - calendar_range = setup_calendar_range() - assert calendar_range[0] == pendulum.datetime(2022, 1, 1) - assert calendar_range[-1] == pendulum.datetime(2022, 12, 31) - - -def test_repr(): - calendar_range = setup_calendar_range() - assert ( - repr(calendar_range) - == "CalendarRange(start=2022-01-01T00:00:00+00:00, end=2022-12-31T00:00:00+00:00, periods=12)" - ) - - -def test_str(): - calendar_range = setup_calendar_range() - assert ( - str(calendar_range) - == "2022-01-01T00:00:00+00:00 to 2022-12-31T00:00:00+00:00 in 12 periods" +def test_date_ranges_from_bounds(): + bounds = [("2020-01-01", "2020-01-31"), ("2020-02-01", "2020-02-29")] + result = pymorize.calendar.date_ranges_from_bounds(bounds) + expected = ( + xr.date_range(start="2020-01-01", end="2020-01-31", freq="M"), + xr.date_range(start="2020-02-01", end="2020-02-29", freq="M"), ) + assert result == expected -def test_from_bounds(): - bounds = [(pendulum.datetime(2022, 1, 1), pendulum.datetime(2022, 12, 31))] - freq = pendulum.duration(months=1) - calendar_ranges = CalendarRange.from_bounds(bounds, freq) - assert len(calendar_ranges) == 1 - assert calendar_ranges[0].start == pendulum.datetime(2022, 1, 1) - assert calendar_ranges[0].end == pendulum.datetime(2022, 12, 31) - - -def test_from_bounds_multiple(): - bounds = [ - (pendulum.datetime(2022, 1, 1), pendulum.datetime(2022, 6, 30)), - (pendulum.datetime(2022, 7, 1), pendulum.datetime(2022, 12, 31)), - ] - freq = pendulum.duration(months=1) - calendar_ranges = CalendarRange.from_bounds(bounds, freq) - assert len(calendar_ranges) == 2 - assert calendar_ranges[0].start == pendulum.datetime(2022, 1, 1) - assert calendar_ranges[0].end == pendulum.datetime(2022, 6, 30) - assert calendar_ranges[1].start == pendulum.datetime(2022, 7, 1) - assert calendar_ranges[1].end == pendulum.datetime(2022, 12, 31) - - -def test_from_bounds_integers(): - bounds = [(2700, 2720)] - calendar_ranges = CalendarRange.from_bounds(bounds) - assert len(calendar_ranges) == 1 - assert calendar_ranges[0].start == pendulum.datetime(2700, 1, 1) - assert calendar_ranges[0].end == pendulum.datetime(2720, 12, 31) +def test_date_ranges_from_bounds_single_range(): + bounds = [("2020-01-01", "2020-12-31")] + result = pymorize.calendar.date_ranges_from_bounds(bounds) + expected = xr.date_range(start="2020-01-01", end="2020-12-31", freq="M") + assert (result == expected).all() def test_year_bounds_major_digits_first_can_end_with_binning_digit(): From 5870af8f43e66cc4e060ad014ce4825d2aa35335 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Tue, 23 Jul 2024 13:51:00 +0200 Subject: [PATCH 6/7] feat: calendar features --- src/pymorize/calendar.py | 4 ++ tests/test_calendar.py | 108 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/src/pymorize/calendar.py b/src/pymorize/calendar.py index 9c7e693..66dae51 100644 --- a/src/pymorize/calendar.py +++ b/src/pymorize/calendar.py @@ -203,3 +203,7 @@ def simple_ranges_from_bounds(bounds): start, end = bounds[0] return range(start, end + 1) return [range(start, end + 1) for start, end in bounds] + + +def assign_time_axis(da: xr.DataArray, taxis): + return da.assign_coords(time=taxis) diff --git a/tests/test_calendar.py b/tests/test_calendar.py index 8ef18d0..84d7571 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -1,6 +1,114 @@ +import numpy as np +import pytest import xarray as xr import pymorize.calendar +from pymorize.calendar import assign_time_axis, date_ranges_from_year_bounds + + +@pytest.fixture +def fake_multidim_data(): + np.random.seed(0) + data = np.random.rand(10, 5, 5) + da = xr.DataArray(data, dims=("time", "x", "y")) + return da + + +def test_assign_time_axis_matching_length_multidim(fake_multidim_data): + year_bounds = [[2000, 2009]] + time_axis = date_ranges_from_year_bounds(year_bounds, freq="Y") + result = assign_time_axis(fake_multidim_data, time_axis) + assert "time" in result.coords + assert len(result.time) == 10 + assert result.time[0].dt.year == 2000 + assert result.time[-1].dt.year == 2009 + + +def test_assign_time_axis_mismatching_length_multidim(fake_multidim_data): + year_bounds = [[2000, 2011]] + time_axis = date_ranges_from_year_bounds(year_bounds, freq="Y") + with pytest.raises(ValueError): + assign_time_axis(fake_multidim_data, time_axis) + + +@pytest.fixture +def fake_multidim_data_diff_dims(): + np.random.seed(0) + data = np.random.rand(10, 4, 6) + da = xr.DataArray(data, dims=("time", "x", "y")) + return da + + +def test_assign_time_axis_matching_length_multidim_diff_dims( + fake_multidim_data_diff_dims, +): + year_bounds = [[2000, 2009]] + time_axis = date_ranges_from_year_bounds(year_bounds, freq="Y") + result = assign_time_axis(fake_multidim_data_diff_dims, time_axis) + assert "time" in result.coords + assert len(result.time) == 10 + assert result.time[0].dt.year == 2000 + assert result.time[-1].dt.year == 2009 + + +def test_assign_time_axis_mismatching_length_multidim_diff_dims( + fake_multidim_data_diff_dims, +): + year_bounds = [[2000, 2011]] + time_axis = date_ranges_from_year_bounds(year_bounds, freq="Y") + with pytest.raises(ValueError): + assign_time_axis(fake_multidim_data_diff_dims, time_axis) + + +@pytest.fixture +def fake_data_three(): + np.random.seed(0) + data1 = np.random.rand(10) + data2 = np.random.rand(20) + data3 = np.random.rand(30) + da1 = xr.DataArray(data1, dims="time") + da2 = xr.DataArray(data2, dims="time") + da3 = xr.DataArray(data3, dims="time") + return da1, da2, da3 + + +def test_assign_time_axis_matching_length_three(fake_data_three): + year_bounds1 = [[2000, 2009]] + year_bounds2 = [[2000, 2019]] + year_bounds3 = [[2000, 2029]] + time_axis1 = date_ranges_from_year_bounds(year_bounds1, freq="Y") + time_axis2 = date_ranges_from_year_bounds(year_bounds2, freq="Y") + time_axis3 = date_ranges_from_year_bounds(year_bounds3, freq="Y") + result1 = assign_time_axis(fake_data_three[0], time_axis1) + result2 = assign_time_axis(fake_data_three[1], time_axis2) + result3 = assign_time_axis(fake_data_three[2], time_axis3) + assert "time" in result1.coords + assert "time" in result2.coords + assert "time" in result3.coords + assert len(result1.time) == 10 + assert len(result2.time) == 20 + assert len(result3.time) == 30 + assert result1.time[0].dt.year == 2000 + assert result1.time[-1].dt.year == 2009 + assert result2.time[0].dt.year == 2000 + assert result2.time[-1].dt.year == 2019 + assert result3.time[0].dt.year == 2000 + assert result3.time[-1].dt.year == 2029 + + +def test_assign_time_axis_mismatching_length_three(fake_data_three): + year_bounds1 = [[2000, 2011]] + year_bounds2 = [[2000, 2021]] + year_bounds3 = [[2000, 2031]] + time_axis1 = date_ranges_from_year_bounds(year_bounds1, freq="Y") + time_axis2 = date_ranges_from_year_bounds(year_bounds2, freq="Y") + time_axis3 = date_ranges_from_year_bounds(year_bounds3, freq="Y") + with pytest.raises(ValueError): + assign_time_axis(fake_data_three[0], time_axis1) + with pytest.raises(ValueError): + assign_time_axis(fake_data_three[1], time_axis2) + with pytest.raises(ValueError): + assign_time_axis(fake_data_three[2], time_axis3) def test_simple_ranges_from_bounds(): From f234e607dfb1bb6d79fa17f05d8fca2239e5a34f Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Tue, 23 Jul 2024 13:54:20 +0200 Subject: [PATCH 7/7] style: sorts requires alphabetically --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 52aba6f..e854cb4 100644 --- a/setup.py +++ b/setup.py @@ -29,16 +29,16 @@ def read(filename): packages=find_packages(where="src", exclude=("tests",)), install_requires=[ "cf_xarray", + "chemicals", "click-loguru", - "pendulum", "dill", + "pendulum", "pint-xarray", "pyyaml", "questionary", "rich-click", "versioneer", "xarray", - "chemicals", ], development_requires=[ "black",