From ab52fbf130e92ce92bf7861a450244c7f7e006b3 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Sat, 28 Oct 2023 12:50:32 +0200 Subject: [PATCH 1/8] Fix _check_time_overlaps --- esmvalcore/preprocessor/_io.py | 161 ++++++++++++++++++++------------- 1 file changed, 96 insertions(+), 65 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index b9c59c1adc..ee8d5ff97d 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -9,10 +9,10 @@ from typing import Optional from warnings import catch_warnings, filterwarnings +import cftime import iris import iris.aux_factory import iris.exceptions -import isodate import numpy as np import yaml from cf_units import suppress_errors @@ -22,7 +22,6 @@ from esmvalcore.iris_helpers import merge_cube_attributes from .._task import write_ncl_settings -from ._time import clip_timerange logger = logging.getLogger(__name__) @@ -206,70 +205,102 @@ def _concatenate_cubes(cubes, check_level): return concatenated +class _TimesHelper: + def __init__(self, time): + self.times = time.core_points() + self.units = str(time.units) + + def __getattr__(self, name): + return getattr(self.times, name) + + def __len__(self): + return len(self.times) + + def __getitem__(self, key): + return self.times[key] + + def _check_time_overlaps(cubes): - """Handle time overlaps.""" - times = [cube.coord('time').core_points() for cube in cubes] - for index, _ in enumerate(times[:-1]): - overlap = np.intersect1d(times[index], times[index + 1]) - if overlap.size != 0: - overlapping_cubes = cubes[index:index + 2] - time_1 = overlapping_cubes[0].coord('time').core_points() - time_2 = overlapping_cubes[1].coord('time').core_points() - - # case 1: both cubes start at the same time -> return longer cube - if time_1[0] == time_2[0]: - if time_1[-1] <= time_2[-1]: - cubes.pop(index) - discarded_cube_index = 0 - used_cube_index = 1 - else: - cubes.pop(index + 1) - discarded_cube_index = 1 - used_cube_index = 0 - logger.debug( - "Both cubes start at the same time but cube %s " - "ends before %s", - overlapping_cubes[discarded_cube_index], - overlapping_cubes[used_cube_index], - ) - logger.debug( - "Cube %s contains all needed data so using it fully", - overlapping_cubes[used_cube_index], - ) - - # case 2: cube1 starts before cube2 - # case 2.1: cube1 ends after cube2 -> return cube1 - elif time_1[-1] > time_2[-1]: - cubes.pop(index + 1) - logger.debug("Using only data from %s", overlapping_cubes[0]) - - # case 2.2: cube1 ends before cube2 -> use full cube2 - # and shorten cube1 - else: - new_time = np.delete( - time_1, - np.argwhere(np.in1d(time_1, overlap)), - ) - new_dates = overlapping_cubes[0].coord('time').units.num2date( - new_time) - logger.debug( - "Extracting time slice between %s and %s from cube %s " - "to use it for concatenation with cube %s", - new_dates[0], - new_dates[-1], - overlapping_cubes[0], - overlapping_cubes[1], - ) - - start_point = isodate.datetime_isoformat( - new_dates[0], format=isodate.isostrf.DT_BAS_COMPLETE) - end_point = isodate.datetime_isoformat( - new_dates[-1], format=isodate.isostrf.DT_BAS_COMPLETE) - new_cube = clip_timerange(overlapping_cubes[0], - f'{start_point}/{end_point}') - - cubes[index] = new_cube - return cubes + """Handle time overlaps. + + Parameters + ---------- + cubes : iris.cube.CubeList + A list of cubes belonging to a single timeseries, + ordered by starting point with possible overlaps. + + Returns + ------- + iris.cube.CubeList + A list of cubes belonging to a single timeseries, + ordered by starting point with no overlaps. + """ + if (number_of_cubes := len(cubes)) < 2: + return cubes + new_cubes = iris.cube.CubeList() + current_cube = cubes[0] + current_times = current_cube.coord("time") + current_start, current_end = current_times.core_points()[[0, -1]] + for index in range(1, number_of_cubes): + new_cube = cubes[index] + new_times = new_cube.coord("time") + new_start, new_end = new_times.core_points()[[0, -1]] + if new_start > current_end: + # no overlap, use current cube and start again from new cube + new_cubes.append(current_cube) + current_cube = new_cube + current_times = new_times + current_start = new_start + current_end = new_end + continue + # overlap + if current_end >= new_end: + # current cube ends after new one, just forget new cube + logger.debug("Using only data from %s", current_cube) + continue + if new_start == current_start: + # new cube completely covers current one + # forget current cube + current_cube = new_cube + current_times = new_times + current_end = new_end + # logger.debug( + # "Both cubes start at the same time but cube %s " + # "ends before %s", + # overlapping_cubes[discarded_cube_index], + # overlapping_cubes[used_cube_index], + # ) + # logger.debug( + # "Cube %s contains all needed data so using it fully", + # overlapping_cubes[used_cube_index], + # ) + continue + # new cube ends after current one, + # use all of new cube, and shorten current cube to + # eliminate overlap with new cube + cut_index = cftime.time2index( + new_start, + _TimesHelper(current_times), + current_times.units.calendar, + select="before", + ) + 1 + new_cubes.append(current_cube[:cut_index]) + current_cube = new_cube + current_times = new_times + current_start = new_start + current_end = new_end + # logger.debug( + # "Extracting time slice between %s and %s from cube %s " + # "to use it for concatenation with cube %s", + # new_dates[0], + # new_dates[-1], + # overlapping_cubes[0], + # overlapping_cubes[1], + # ) + + new_cubes.append(current_cube) + + return new_cubes def _fix_calendars(cubes): From 02041bbfa7144ec3ec4768e89a5ccadba6566a89 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 30 Oct 2023 09:59:54 +0100 Subject: [PATCH 2/8] Add logging --- esmvalcore/preprocessor/_io.py | 36 +++++++++++++--------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index ee8d5ff97d..62f4dc0190 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -206,6 +206,7 @@ def _concatenate_cubes(cubes, check_level): class _TimesHelper: + def __init__(self, time): self.times = time.core_points() self.units = str(time.units) @@ -223,6 +224,8 @@ def __getitem__(self, key): def _check_time_overlaps(cubes): """Handle time overlaps. + + Parameters ---------- cubes : iris.cube.CubeList @@ -235,18 +238,18 @@ def _check_time_overlaps(cubes): A list of cubes belonging to a single timeseries, ordered by starting point with no overlaps. """ - if (number_of_cubes := len(cubes)) < 2: + if len(cubes) < 2: return cubes new_cubes = iris.cube.CubeList() current_cube = cubes[0] current_times = current_cube.coord("time") current_start, current_end = current_times.core_points()[[0, -1]] - for index in range(1, number_of_cubes): - new_cube = cubes[index] + for new_cube in cubes[1:]: new_times = new_cube.coord("time") new_start, new_end = new_times.core_points()[[0, -1]] if new_start > current_end: # no overlap, use current cube and start again from new cube + logger.debug("Using %s", current_cube) new_cubes.append(current_cube) current_cube = new_cube current_times = new_times @@ -256,7 +259,9 @@ def _check_time_overlaps(cubes): # overlap if current_end >= new_end: # current cube ends after new one, just forget new cube - logger.debug("Using only data from %s", current_cube) + logger.debug( + "Discarding %s because the time rangs is already covered by %s", + new_cube, current_cube) continue if new_start == current_start: # new cube completely covers current one @@ -264,16 +269,9 @@ def _check_time_overlaps(cubes): current_cube = new_cube current_times = new_times current_end = new_end - # logger.debug( - # "Both cubes start at the same time but cube %s " - # "ends before %s", - # overlapping_cubes[discarded_cube_index], - # overlapping_cubes[used_cube_index], - # ) - # logger.debug( - # "Cube %s contains all needed data so using it fully", - # overlapping_cubes[used_cube_index], - # ) + logger.debug( + "Discarding %s because the time range is covered by %s", + current_cube, new_cube) continue # new cube ends after current one, # use all of new cube, and shorten current cube to @@ -284,20 +282,14 @@ def _check_time_overlaps(cubes): current_times.units.calendar, select="before", ) + 1 + logger.debug("Using %s shortened to %s due to overlap", current_cube, current_times.cell(cut_index)) new_cubes.append(current_cube[:cut_index]) current_cube = new_cube current_times = new_times current_start = new_start current_end = new_end - # logger.debug( - # "Extracting time slice between %s and %s from cube %s " - # "to use it for concatenation with cube %s", - # new_dates[0], - # new_dates[-1], - # overlapping_cubes[0], - # overlapping_cubes[1], - # ) + logger.debug("Using %s", current_cube) new_cubes.append(current_cube) return new_cubes From e45801bffcbba325e56a6d7af7b84c94afd2201c Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 30 Oct 2023 10:57:56 +0100 Subject: [PATCH 3/8] Fix formatting problems --- esmvalcore/preprocessor/_io.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 62f4dc0190..c2d8ab8e34 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -224,8 +224,6 @@ def __getitem__(self, key): def _check_time_overlaps(cubes): """Handle time overlaps. - - Parameters ---------- cubes : iris.cube.CubeList @@ -260,8 +258,8 @@ def _check_time_overlaps(cubes): if current_end >= new_end: # current cube ends after new one, just forget new cube logger.debug( - "Discarding %s because the time rangs is already covered by %s", - new_cube, current_cube) + "Discarding %s because the time range " + "is already covered by %s", new_cube, current_cube) continue if new_start == current_start: # new cube completely covers current one @@ -282,7 +280,8 @@ def _check_time_overlaps(cubes): current_times.units.calendar, select="before", ) + 1 - logger.debug("Using %s shortened to %s due to overlap", current_cube, current_times.cell(cut_index)) + logger.debug("Using %s shortened to %s due to overlap", current_cube, + current_times.cell(cut_index)) new_cubes.append(current_cube[:cut_index]) current_cube = new_cube current_times = new_times From 741b61fb5ec75f76ea65ff615efb9bdedcdfb7dd Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 30 Oct 2023 11:20:28 +0100 Subject: [PATCH 4/8] Simplify tracking --- esmvalcore/preprocessor/_io.py | 62 ++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index c2d8ab8e34..95a1048fe0 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -6,7 +6,7 @@ import os from itertools import groupby from pathlib import Path -from typing import Optional +from typing import Optional, NamedTuple from warnings import catch_warnings, filterwarnings import cftime @@ -238,58 +238,60 @@ def _check_time_overlaps(cubes): """ if len(cubes) < 2: return cubes + + class TrackedCube(NamedTuple): + cube: iris.cube.Cube + times: iris.coords.DimCoord + start: float + end: float + + @classmethod + def from_cube(cls, cube): + times = cube.coord("time") + start, end = times.core_points()[[0, -1]] + return cls(cube, times, start, end) + new_cubes = iris.cube.CubeList() - current_cube = cubes[0] - current_times = current_cube.coord("time") - current_start, current_end = current_times.core_points()[[0, -1]] - for new_cube in cubes[1:]: - new_times = new_cube.coord("time") - new_start, new_end = new_times.core_points()[[0, -1]] - if new_start > current_end: + current_cube = TrackedCube.from_cube(cubes[0]) + for new_cube in map(TrackedCube.from_cube, cubes[1:]): + if new_cube.start > current_cube.end: # no overlap, use current cube and start again from new cube - logger.debug("Using %s", current_cube) - new_cubes.append(current_cube) + logger.debug("Using %s", current_cube.cube) + new_cubes.append(current_cube.cube) current_cube = new_cube - current_times = new_times - current_start = new_start - current_end = new_end continue # overlap - if current_end >= new_end: + if current_cube.end >= new_cube.end: # current cube ends after new one, just forget new cube logger.debug( "Discarding %s because the time range " - "is already covered by %s", new_cube, current_cube) + "is already covered by %s", new_cube.cube, current_cube.cube) continue - if new_start == current_start: + if new_cube.start == current_cube.start: # new cube completely covers current one # forget current cube current_cube = new_cube - current_times = new_times - current_end = new_end logger.debug( "Discarding %s because the time range is covered by %s", - current_cube, new_cube) + current_cube.cube, new_cube.cube) continue # new cube ends after current one, # use all of new cube, and shorten current cube to # eliminate overlap with new cube cut_index = cftime.time2index( - new_start, - _TimesHelper(current_times), - current_times.units.calendar, + new_cube.start, + _TimesHelper(current_cube.times), + current_cube.times.units.calendar, select="before", ) + 1 - logger.debug("Using %s shortened to %s due to overlap", current_cube, - current_times.cell(cut_index)) - new_cubes.append(current_cube[:cut_index]) + logger.debug("Using %s shortened to %s due to overlap", + current_cube.cube, + current_cube.times.cell(cut_index).point) + new_cubes.append(current_cube.cube[:cut_index]) current_cube = new_cube - current_times = new_times - current_start = new_start - current_end = new_end - logger.debug("Using %s", current_cube) - new_cubes.append(current_cube) + logger.debug("Using %s", current_cube.cube) + new_cubes.append(current_cube.cube) return new_cubes From 86b33fe148663ac9ca437aa8ea9d3664eed44bb6 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 30 Oct 2023 11:23:09 +0100 Subject: [PATCH 5/8] Restore edge case behavior to fix test --- esmvalcore/preprocessor/_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 95a1048fe0..42b9e6e101 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -261,7 +261,7 @@ def from_cube(cls, cube): current_cube = new_cube continue # overlap - if current_cube.end >= new_cube.end: + if current_cube.end > new_cube.end: # current cube ends after new one, just forget new cube logger.debug( "Discarding %s because the time range " From bb80ada2d6ff409a563f03a75d43b079736ee67f Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 30 Oct 2023 11:37:41 +0100 Subject: [PATCH 6/8] Add type annotations --- esmvalcore/preprocessor/_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 42b9e6e101..a443b0e7b0 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -221,7 +221,7 @@ def __getitem__(self, key): return self.times[key] -def _check_time_overlaps(cubes): +def _check_time_overlaps(cubes: iris.cube.CubeList) -> iris.cube.CubeList: """Handle time overlaps. Parameters From 997ba45fc1b90e1575b82feff99d5bfcc5c6a0ee Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 30 Oct 2023 11:39:09 +0100 Subject: [PATCH 7/8] Mark _TrackedCube as private --- esmvalcore/preprocessor/_io.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index a443b0e7b0..18d552d671 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -239,7 +239,7 @@ def _check_time_overlaps(cubes: iris.cube.CubeList) -> iris.cube.CubeList: if len(cubes) < 2: return cubes - class TrackedCube(NamedTuple): + class _TrackedCube(NamedTuple): cube: iris.cube.Cube times: iris.coords.DimCoord start: float @@ -252,8 +252,8 @@ def from_cube(cls, cube): return cls(cube, times, start, end) new_cubes = iris.cube.CubeList() - current_cube = TrackedCube.from_cube(cubes[0]) - for new_cube in map(TrackedCube.from_cube, cubes[1:]): + current_cube = _TrackedCube.from_cube(cubes[0]) + for new_cube in map(_TrackedCube.from_cube, cubes[1:]): if new_cube.start > current_cube.end: # no overlap, use current cube and start again from new cube logger.debug("Using %s", current_cube.cube) From 962a3676733e73d0a4fa6cae2add202dd2a14781 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 30 Oct 2023 11:48:57 +0100 Subject: [PATCH 8/8] Add docstring stub --- esmvalcore/preprocessor/_io.py | 1 + 1 file changed, 1 insertion(+) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 18d552d671..f1586a9f17 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -247,6 +247,7 @@ class _TrackedCube(NamedTuple): @classmethod def from_cube(cls, cube): + """Construct tracked cube.""" times = cube.coord("time") start, end = times.core_points()[[0, -1]] return cls(cube, times, start, end)