wrap up for version 1.4.1 (#829)

+ version: add Tag for version 1.4.1 + readfile.read_hdf5_file(): speedup the 3D matrix reading when slicing a small fraction of the 1st dimension, by using integer indexing for 3D h5 dataset, instead of 1D boolean array indexing. + view.read_data4figure(): bugfix for referencing unwrapPhase while plotting mixed dset types + move the following plotting functions to utils.plot.py for a more compact module import, to simplify the UNAVCO notebook: - unwrap_error_phase_closure.plot_num_triplet_with_nonzero_integer_ambiguity() - timeseries_rms.plot_rms_bar() - objects.insar_vs_gps.plot_insar_vs_gps_scatter() + plot.plot_insar_vs_gps_scatter(): add preliminary outlier detection
insarlab · Aug 16, 2022 · 36a0835 · 36a0835
1 parent 47bb7f6
commit 36a0835
Show file tree

Hide file tree

Showing 8 changed files with 361 additions and 310 deletions.
diff --git a/mintpy/objects/insar_vs_gps.py b/mintpy/objects/insar_vs_gps.py
@@ -13,144 +13,14 @@
 from scipy.interpolate import griddata
 from datetime import datetime as dt
 from dateutil.relativedelta import relativedelta
-from matplotlib import pyplot as plt
 
 from mintpy.objects import timeseries, giantTimeseries
-from mintpy.utils import ptime, readfile, plot as pp, utils as ut
+from mintpy.utils import readfile, plot as pp, utils as ut
 from mintpy.objects.gps import GPS
 from mintpy.defaults.plot import *
 
 
 
-############################## utilities functions ##########################################
-
-def plot_insar_vs_gps_scatter(vel_file, csv_file='gps_enu2los.csv', msk_file=None, ref_gps_site=None,
-                              xname='InSAR', vlim=None, ex_gps_sites=[], display=True):
-    """Scatter plot to compare the velocities between SAR/InSAR and GPS.
-
-    Parameters: vel_file     - str, path of InSAR LOS velocity HDF5 file.
-                ref_gps_site - str, reference GNSS site name
-                csv_file     - str, path of GNSS CSV file, generated after running view.py --gps-comp
-                msk_file     - str, path of InSAR mask file.
-                xname        - str, xaxis label
-                vlim         - list of 2 float, display value range in the unit of cm/yr
-                               Default is None to grab from data
-                               If set, the range will be used to prune the SAR and GPS observations
-                ex_gps_sites - list of str, exclude GNSS sites for analysis and plotting.
-    Example:
-        from mintpy.objects.insar_vs_gps import plot_insar_vs_gps_scatter
-        csv_file = os.path.join(work_dir, 'geo/gps_enu2los.csv')
-        vel_file = os.path.join(work_dir, 'geo/geo_velocity.h5')
-        msk_file = os.path.join(work_dir, 'geo/geo_maskTempCoh.h5')
-        plot_insar_vs_gps_scatter(vel_file, ref_gps_site='CACT', csv_file=csv_file, msk_file=msk_file, vlim=[-2.5, 2])
-    """
-
-    disp_unit = 'cm/yr'
-    unit_fac = 100.
-
-    # read GPS velocity from CSV file (generated by gps.get_gps_los_obs())
-    col_names = ['Site', 'Lon', 'Lat', 'Displacement', 'Velocity']
-    num_col = len(col_names)
-    col_types = ['U10'] + ['f8'] * (num_col - 1)
-
-    print('read GPS velocity from file: {}'.format(csv_file))
-    fc = np.genfromtxt(csv_file, dtype=col_types, delimiter=',', names=True)
-    sites = fc['Site']
-    lats = fc['Lat']
-    lons = fc['Lon']
-    gps_obs = fc[col_names[-1]] * unit_fac
-
-    if ex_gps_sites:
-        ex_flag = np.array([x in ex_gps_sites for x in sites], dtype=np.bool_)
-        if np.sum(ex_flag) > 0:
-            sites = sites[~ex_flag]
-            lats = lats[~ex_flag]
-            lons = lons[~ex_flag]
-            gps_obs = gps_obs[~ex_flag]
-
-    # read InSAR velocity
-    print('read InSAR velocity from file: {}'.format(vel_file))
-    atr = readfile.read_attribute(vel_file)
-    length, width = int(atr['LENGTH']), int(atr['WIDTH'])
-    coord = ut.coordinate(atr)
-    ys, xs = coord.geo2radar(lats, lons)[:2]
-
-    msk = readfile.read(msk_file)[0] if msk_file else np.ones((length, width), dtype=np.bool_)
-
-    num_site = sites.size
-    insar_obs = np.zeros(num_site, dtype=np.float32) * np.nan
-    prog_bar = ptime.progressBar(maxValue=num_site)
-    for i in range(num_site):
-        x, y = xs[i], ys[i]
-        if (0 <= x < width) and (0 <= y < length) and msk[y, x]:
-            box = (x, y, x+1, y+1)
-            insar_obs[i] = readfile.read(vel_file, datasetName='velocity', box=box)[0] * unit_fac
-        prog_bar.update(i+1, suffix='{}/{} {}'.format(i+1, num_site, sites[i]))
-    prog_bar.close()
-
-    off_med = np.nanmedian(insar_obs - gps_obs)
-    print(f'median offset between InSAR and GPS [before common referencing]: {off_med:.2f} cm/year')
-
-    # reference site
-    if ref_gps_site:
-        print(f'referencing both InSAR and GPS data to site: {ref_gps_site}')
-        ref_ind = sites.tolist().index(ref_gps_site)
-        gps_obs -= gps_obs[ref_ind]
-        insar_obs -= insar_obs[ref_ind]
-
-    # remove NaN value
-    print('removing sites with NaN values in GPS or {}'.format(xname))
-    flag = np.multiply(~np.isnan(insar_obs), ~np.isnan(gps_obs))
-    if vlim is not None:
-        print('pruning sites with value range: {} {}'.format(vlim, disp_unit))
-        flag *= gps_obs >= vlim[0]
-        flag *= gps_obs <= vlim[1]
-        flag *= insar_obs >= vlim[0]
-        flag *= insar_obs <= vlim[1]
-
-    gps_obs = gps_obs[flag]
-    insar_obs = insar_obs[flag]
-    sites = sites[flag]
-
-    # stats
-    print('GPS   min/max: {:.2f} / {:.2f}'.format(np.nanmin(gps_obs), np.nanmax(gps_obs)))
-    print('InSAR min/max: {:.2f} / {:.2f}'.format(np.nanmin(insar_obs), np.nanmax(insar_obs)))
-
-    rmse = np.sqrt(np.sum((insar_obs - gps_obs)**2) / (gps_obs.size - 1))
-    r2 = stats.linregress(insar_obs, gps_obs)[2]
-    print('RMSE = {:.1f} cm'.format(rmse))
-    print('R^2 = {:.2f}'.format(r2))
-
-    # plot
-    if display:
-        plt.rcParams.update({'font.size': 12})
-        if vlim is None:
-            vlim = [np.min(insar_obs), np.max(insar_obs)]
-            buffer = (vlim[1] - vlim[0]) * 0.1
-            vlim = [vlim[0] - buffer, vlim[1] + buffer]
-
-        fig, ax = plt.subplots(figsize=[4, 4])
-        ax.plot((vlim[0], vlim[1]), (vlim[0], vlim[1]), 'k--')
-        ax.plot(insar_obs, gps_obs, '.', ms=15)
-
-        # axis format
-        ax.set_xlim(vlim)
-        ax.set_ylim(vlim)
-        ax.set_xlabel(f'{xname} [{disp_unit}]')
-        ax.set_ylabel(f'GNSS [{disp_unit}]')
-        ax.set_aspect('equal', 'box')
-        fig.tight_layout()
-
-        # output
-        out_fig = '{}_vs_gps_scatter.pdf'.format(xname.lower())
-        plt.savefig(out_fig, bbox_inches='tight', transparent=True, dpi=300)
-        print('save figure to file', out_fig)
-        plt.show()
-
-    return sites, insar_obs, gps_obs
-
-
-
 ############################## beginning of insar_vs_gps class ##############################
 class insar_vs_gps:
     """ Comparing InSAR time-series with GPS time-series in LOS direction

diff --git a/mintpy/timeseries2velocity.py b/mintpy/timeseries2velocity.py
@@ -487,7 +487,7 @@ def run_timeseries2time_func(inps):
             # Bootstrapping is a resampling method which can be used to estimate properties
             # of an estimator. The method relies on independently sampling the data set with
             # replacement.
-            print('estimating time function STD with bootstrap resampling ({} times) ...'.format(
+            print('estimating time functions STD with bootstrap resampling ({} times) ...'.format(
                 inps.bootstrapCount))
 
             # calc model of all bootstrap sampling
@@ -559,7 +559,7 @@ def run_timeseries2time_func(inps):
                 # TO DO: save the full covariance matrix of the time function parameters
                 # only the STD is saved right now
                 covar_flag = True if len(ts_cov.shape) == 3 else False
-                msg = 'estimating time function STD from time-serries '
+                msg = 'estimating time functions STD from time-serries '
                 msg += 'covariance pixel-by-pixel ...' if covar_flag else 'variance pixel-by-pixel ...'
                 print(msg)
 
@@ -583,7 +583,7 @@ def run_timeseries2time_func(inps):
 
             elif inps.uncertaintyQuantification == 'residue':
                 # option 2.3 - assume obs errors following normal dist. in time
-                print('estimating time function STD from time-series fitting residual ...')
+                print('estimating time functions STD from time-series fitting residual ...')
                 G_inv = linalg.inv(np.dot(G.T, G))
                 m_var = e2.reshape(1, -1) / (num_date - num_param)
                 m_std[:, mask] = np.sqrt(np.dot(np.diag(G_inv).reshape(-1, 1), m_var))

diff --git a/mintpy/timeseries_rms.py b/mintpy/timeseries_rms.py
@@ -9,10 +9,9 @@
 import os
 import sys
 import numpy as np
-import matplotlib.pyplot as plt
-from mpl_toolkits.axes_grid1 import make_axes_locatable
+
 from mintpy.defaults.template import get_template_content
-from mintpy.utils import readfile, ptime, utils as ut, plot as pp
+from mintpy.utils import readfile, utils as ut, plot as pp
 from mintpy.utils.arg_utils import create_argument_parser
 
 
@@ -103,15 +102,14 @@ def analyze_rms(date_list, rms_list, inps):
         print('save date to file: '+ref_date_file)
 
     # exclude date(s) - outliers
-    try:
-        rms_threshold = ut.median_abs_deviation_threshold(rms_list, center=0., cutoff=inps.cutoff)
-    except:
-        # equivalent calculation using numpy assuming Gaussian distribution
-        rms_threshold = np.median(rms_list) / .6745 * inps.cutoff
+    # equivalent calculation using numpy assuming Gaussian distribution as:
+    # rms_threshold = np.median(rms_list) / .6745 * inps.cutoff
+    rms_threshold = ut.median_abs_deviation_threshold(rms_list, center=0., cutoff=inps.cutoff)
 
     ex_idx = [rms_list.index(i) for i in rms_list if i > rms_threshold]
-    print(('-'*50+'\ndate(s) with RMS > {} * median RMS'
-           ' ({:.4f})'.format(inps.cutoff, rms_threshold)))
+    print('-'*50)
+    print(f'date(s) with RMS > {inps.cutoff} * median RMS ({rms_threshold:.4f})')
+
     ex_date_file = 'exclude_date.txt'
     if ex_idx:
         # print
@@ -127,110 +125,37 @@ def analyze_rms(date_list, rms_list, inps):
         if os.path.isfile(ex_date_file):
             os.remove(ex_date_file)
 
-    # plot bar figure and save
-    fig_file = os.path.splitext(inps.rms_file)[0]+'.pdf'
-    fig, ax = plt.subplots(figsize=inps.fig_size)
-    print('create figure in size:', inps.fig_size)
-    ax = plot_rms_bar(ax, date_list, np.array(rms_list)*1000., cutoff=inps.cutoff)
-    fig.savefig(fig_file, bbox_inches='tight', transparent=True)
-    print('save figure to file: '+fig_file)
     return inps
 
 
-def plot_rms_bar(ax, date_list, rms, cutoff=3., font_size=12,
-                 tick_year_num=1, legend_loc='best',
-                 disp_legend=True, disp_side_plot=True, disp_thres_text=False,
-                 ylabel='Residual phase RMS [mm]'):
-    """ Bar plot Phase Residual RMS
-    Parameters: ax : Axes object
-                date_list : list of string in YYYYMMDD format
-                rms    : 1D np.array of float for RMS value in mm
-                cutoff : cutoff value of MAD outlier detection
-                tick_year_num : int, number of years per major tick
-                legend_loc : 'upper right' or (0.5, 0.5)
-    Returns:    ax : Axes object
-    """
-    dates, datevector = ptime.date_list2vector(date_list)
-    dates = np.array(dates)
-    try:
-        bar_width = min(ut.most_common(np.diff(dates).tolist(), k=2))*3/4
-    except:
-        bar_width = np.min(np.diff(dates).tolist())*3/4
-    rms = np.array(rms)
-
-    # Plot all dates
-    ax.bar(dates, rms, bar_width.days, color=pp.mplColors[0])
-
-    # Plot reference date
-    ref_idx = np.argmin(rms)
-    ax.bar(dates[ref_idx], rms[ref_idx], bar_width.days, color=pp.mplColors[1], label='Reference date')
-
-    # Plot exclude dates
-    rms_threshold = ut.median_abs_deviation_threshold(rms, center=0., cutoff=cutoff)
-    ex_idx = rms > rms_threshold
-    if not np.all(ex_idx==False):
-        ax.bar(dates[ex_idx], rms[ex_idx], bar_width.days, color='darkgray', label='Exclude date')
-
-    # Plot rms_threshold line
-    (ax, xmin, xmax) = pp.auto_adjust_xaxis_date(ax, datevector, font_size, every_year=tick_year_num)
-    ax.plot(np.array([xmin, xmax]), np.array([rms_threshold, rms_threshold]), '--k',
-            label='Median Abs Dev * {}'.format(cutoff))
-
-    # axis format
-    ax = pp.auto_adjust_yaxis(ax, np.append(rms, rms_threshold), font_size, ymin=0.0)
-    #ax.set_xlabel('Time [years]', fontsize=font_size)
-    ax.set_ylabel(ylabel, fontsize=font_size)
-    ax.tick_params(which='both', direction='in', labelsize=font_size,
-                   bottom=True, top=True, left=True, right=True)
-
-    # 2nd axes for circles
-    if disp_side_plot:
-        divider = make_axes_locatable(ax)
-        ax2 = divider.append_axes("right", "10%", pad="2%")
-        ax2.plot(np.ones(rms.shape, np.float32) * 0.5, rms, 'o', mfc='none', color=pp.mplColors[0])
-        ax2.plot(np.ones(rms.shape, np.float32)[ref_idx] * 0.5, rms[ref_idx], 'o', mfc='none', color=pp.mplColors[1])
-        if not np.all(ex_idx==False):
-            ax2.plot(np.ones(rms.shape, np.float32)[ex_idx] * 0.5, rms[ex_idx], 'o', mfc='none', color='darkgray')
-        ax2.plot(np.array([0, 1]), np.array([rms_threshold, rms_threshold]), '--k')
-
-        ax2.set_ylim(ax.get_ylim())
-        ax2.set_xlim([0, 1])
-        ax2.tick_params(which='both', direction='in', labelsize=font_size,
-                        bottom=True, top=True, left=True, right=True)
-        ax2.get_xaxis().set_ticks([])
-        ax2.get_yaxis().set_ticklabels([])
-
-    if disp_legend:
-        ax.legend(loc=legend_loc, frameon=False, fontsize=font_size)
-
-    # rms_threshold text
-    if disp_thres_text:
-        ymin, ymax = ax.get_ylim()
-        yoff = (ymax - ymin) * 0.1
-        if (rms_threshold - ymin) > 0.5 * (ymax - ymin):
-            yoff *= -1.
-        ax.annotate('Median Abs Dev * {}'.format(cutoff),
-                    xy=(xmin + (xmax-xmin)*0.05, rms_threshold + yoff ),
-                    color='k', xycoords='data', fontsize=font_size)
-    return ax
-
-
 ######################################################################################################
 def main(iargs=None):
-    plt.switch_backend('Agg')  # Backend setting
 
+    # read inputs
     inps = cmd_line_parse(iargs)
     if inps.template_file:
         inps = read_template2inps(inps.template_file, inps)
 
     # calculate timeseries of residual Root Mean Square
-    (inps.rms_list,
-     inps.date_list,
-     inps.rms_file) = ut.get_residual_rms(inps.timeseries_file,
-                                          mask_file=inps.maskFile,
-                                          ramp_type=inps.deramp)
+    inps.rms_list, inps.date_list, inps.rms_file = ut.get_residual_rms(
+        inps.timeseries_file,
+        mask_file=inps.maskFile,
+        ramp_type=inps.deramp,
+    )
 
+    # analyze RMS: generate reference/exclude_date.txt files
     analyze_rms(inps.date_list, inps.rms_list, inps)
+
+    # plot RMS
+    pp.plot_timeseries_rms(
+        rms_file=inps.rms_file,
+        cutoff=inps.cutoff,
+        out_fig=os.path.splitext(inps.rms_file)[0]+'.pdf',
+        disp_fig=False,
+        fig_size=inps.fig_size,
+        tick_year_num=inps.tick_year_num,
+    )
+
     return