Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bugfix in save_gdal for boolean type & cluster.split_box2sub_boxes for tiny last step #1024

Merged
merged 4 commits into from
Jun 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/mintpy/dem_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def correct_dem_error(inps):

# split in row/line direction based on the input memory limit
num_box = int(np.ceil((num_epoch * length * width * 4) * 2.5 / (inps.maxMemory * 1024**3)))
box_list = cluster.split_box2sub_boxes(
box_list, num_box = cluster.split_box2sub_boxes(
box=(0, 0, width, length),
num_split=num_box,
dimension='y',
Expand Down
4 changes: 2 additions & 2 deletions src/mintpy/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def diff_timeseries(file1, file2, out_file, force_diff=False, max_num_pixel=2e8)
# block-by-block IO
length, width = int(atr1['LENGTH']), int(atr1['WIDTH'])
num_box = int(np.ceil(len(date_list1) * length * width / max_num_pixel))
box_list = cluster.split_box2sub_boxes(
box_list, num_box = cluster.split_box2sub_boxes(
box=(0, 0, width, length),
num_split=num_box,
dimension='y',
Expand Down Expand Up @@ -189,7 +189,7 @@ def diff_timeseries_and_velocity(file1, file2, out_file, max_num_pixel=2e8):
# block-by-block IO
length, width = int(atr1['LENGTH']), int(atr1['WIDTH'])
num_box = int(np.ceil(len(date_list) * length * width / max_num_pixel))
box_list = cluster.split_box2sub_boxes(
box_list, num_box = cluster.split_box2sub_boxes(
box=(0, 0, width, length),
num_split=num_box,
dimension='y',
Expand Down
31 changes: 22 additions & 9 deletions src/mintpy/objects/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,16 @@ def split_box2sub_boxes(box, num_split, dimension='x', print_msg=False):
"""Divide the input box into `num_split` different sub_boxes.

:param box: [x0, y0, x1, y1]: list[int] of size 4
:param num_split: int, the number of sub_boxes to split a box into
:param num_split: int, the initial number of sub_boxes to split a box into
:param dimension: str = 'y' or 'x', the dimension along which to split the boxes
:return: sub_boxes: list(list(4 int)), the splited sub boxes
:return: num_split: int, the final number of splitted sub_boxes
"""
import numpy as np

dimension = dimension.lower()
if num_split <= 1:
return [box]
return [box], num_split

# basic info
x0, y0, x1, y1 = box
Expand All @@ -50,29 +51,37 @@ def split_box2sub_boxes(box, num_split, dimension='x', print_msg=False):
else:
dim_size = width
step = int(np.ceil(dim_size / num_split))
step = max(step, 10) # constain the min step size
num_split = int(np.ceil(dim_size / step)) # trim the final number of boxes
# condition: step >= 10
step = max(step, 10)
# update num_split based on the final step size
num_split = int(np.ceil(dim_size / step))
# if the last step is too small, merge it into the 2nd last one
last_step = dim_size - step * (num_split - 1)
if last_step < step * 0.05 or last_step < 5:
num_split -= 1

# get list of boxes
sub_boxes = []
for i in range(num_split):
if dimension == 'y':
r0 = y0 + step * i
r1 = y0 + step * (i + 1)
r1 = min(r1, y1)
if i == num_split - 1:
r1 = y1
sub_boxes.append([x0, r0, x1, r1])

else:
c0 = x0 + step * i
c1 = x0 + step * (i + 1)
c1 = min(c1, x1)
if i == num_split - 1:
c1 = x1
sub_boxes.append([c0, y0, c1, y1])

if print_msg:
print(f'split along {dimension} dimension ({dim_size:d}) into {num_split:d} boxes')
print(f' with each box up to {step:d} in {dimension} dimension')

return sub_boxes
return sub_boxes, num_split


def set_num_threads(num_threads=None, print_msg=True):
Expand Down Expand Up @@ -231,8 +240,12 @@ def run(self, func, func_data, results):
# split the primary box into sub boxes for workers AND
# update the number of workers based on split result
box = func_data["box"]
sub_boxes = split_box2sub_boxes(box, num_split=self.num_worker, dimension='x', print_msg=False)
self.num_worker = len(sub_boxes)
sub_boxes, self.num_worker = split_box2sub_boxes(
box,
num_split=self.num_worker,
dimension='x',
print_msg=False,
)
print(f'split patch into {self.num_worker} sub boxes in x direction for workers to process')

# start a bunch of workers from the cluster
Expand Down
3 changes: 1 addition & 2 deletions src/mintpy/objects/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,13 +457,12 @@ def find_valid_lat_lon(lat, lon):

# split dest_box (in grid)
# and update num_box based on the actual dest_box_list
self.dest_box_list = split_box2sub_boxes(
self.dest_box_list, self.num_box = split_box2sub_boxes(
box=(0, 0, self.width, self.length),
num_split=self.num_box,
dimension='y',
print_msg=True,
)
self.num_box = len(self.dest_box_list)

# dest_box --> src_box / src_def / dest_def
for i, dest_box in enumerate(self.dest_box_list):
Expand Down
2 changes: 1 addition & 1 deletion src/mintpy/reference_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def change_timeseries_ref_date(ts_file, ref_date, outfile=None, max_memory=4.0,

# get list of boxes for block-by-block IO
num_box = int(np.ceil((num_date * length * width * 4 * 2) / (max_memory * 1024**3)))
box_list = split_box2sub_boxes(
box_list, num_box = split_box2sub_boxes(
box=(0, 0, width, length),
num_split=num_box,
dimension='y',
Expand Down
6 changes: 6 additions & 0 deletions src/mintpy/save_gdal.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import warnings

import numpy as np
from osgeo import gdal, osr

from mintpy.utils import plot as pp, readfile, utils0 as ut
Expand Down Expand Up @@ -55,6 +56,11 @@ def write_gdal(data, meta, out_file, out_fmt='GTiff'):
msg += 'Assume EPSG = 4326 (WGS84) and continue.'
warnings.warn(msg)

# convert boolean to uint8, as GDAL does not have a direct analogue to boolean
if data.dtype == 'bool':
print('convert data from boolean to uint8, as GDAL does not support boolean')
data = np.array(data, dtype=np.uint8)

# write file
driver = gdal.GetDriverByName(out_fmt)
print(f'initiate GDAL driver: {driver.LongName}')
Expand Down
10 changes: 6 additions & 4 deletions src/mintpy/timeseries2velocity.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,10 +207,12 @@ def run_timeseries2time_func(inps):
if inps.uncertaintyQuantification == 'bootstrap':
memoryAll += inps.bootstrapCount * num_param * length * width * 4
num_box = int(np.ceil(memoryAll * 3 / (inps.maxMemory * 1024**3)))
box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length),
num_split=num_box,
dimension='y',
print_msg=True)
box_list, num_box = cluster.split_box2sub_boxes(
box=(0, 0, width, length),
num_split=num_box,
dimension='y',
print_msg=True,
)

# loop for block-by-block IO
for i, box in enumerate(box_list):
Expand Down