diff --git a/ush/met_util.py b/ush/met_util.py index 90d981b25c..904f7b5bb2 100755 --- a/ush/met_util.py +++ b/ush/met_util.py @@ -1,16 +1,13 @@ #!/usr/bin/env python from __future__ import print_function -import constants_pdef as P -import os +import logging, os, shutil, sys, datetime +from produtil.run import batchexe, run, checkrun import errno -import logging import time import calendar import re -import sys import string_template_substitution as sts -import subprocess import run_tc_stat as tcs ''' A collection of utility functions used to perform necessary series @@ -81,21 +78,60 @@ def mkdir_p(path): else: raise +def _rmtree_onerr(function,path,exc_info,logger=None): + """!Internal function used to log errors. + + This is an internal implementation function called by + shutil.rmtree when an underlying function call failed. See + the Python documentation of shutil.rmtree for details. + @param function the funciton that failed + @param path the path to the function that caused problems + @param exc_info the exception information + @protected""" + if logger: + logger.warning('%s: %s failed: %s'%( + str(path),str(function),str(exc_info))) + +def rmtree(tree,logger=None): + """!Deletes the tree, if possible. + @protected + @param tree the directory tree to delete""" + try: + # If it is a file, special file or symlink we can just + # delete it via unlink: + os.unlink(tree) + return + except EnvironmentError as e: + pass + # We get here for directories. + if logger: + logger.info('%s: rmtree'%(tree,)) + #shutil.rmtree(tree,ignore_errors=False,onerror=_rmtree_onerr) + shutil.rmtree(tree,ignore_errors=False) + def get_logger(p): '''Gets a logger Args: - p: the ConfigMaster constants param file + p: the METplus produtil.ProdConfig object Returns: logger: the logger ''' # Retrieve all logging related parameters from the param file - log_dir = p.opt["LOG_DIR"] - log_level = p.opt["LOG_LEVEL"] - log_filename = p.opt["LOG_FILENAME"] + log_dir = p.getdir('LOG_DIR') + log_level = p.getstr('config', 'LOG_LEVEL') + log_path_basename = os.path.splitext(p.getstr('config','LOG_FILENAME'))[0] + log_ext = os.path.splitext(p.getstr('config','LOG_FILENAME'))[1] + log_filename = log_path_basename+'.'\ + +datetime.datetime.now().strftime("%Y%m%d")\ + +log_ext.strip() + + #TODO review, use builtin produtil.fileop vs. mkdir_p ? + #import produtil.fileop + #produtil.fileop.makedirs(log_dir,logger=None) # Check if the directory path for the log exists, if # not create it. @@ -262,17 +298,11 @@ def get_storm_ids(filter_filename, logger): return empty_list if os.stat(filter_filename).st_size == 0: return empty_list - with open(filter_filename) as fileobj: - # skip the first line as it contains the header - next(fileobj) - for line in fileobj: - # split the columns, which are separated by one or - # more whitespace, hence the line.split() without any - # args - cols = line.split() - - # we are only interested in the 4th column, STORM_ID - storm_id_list.add(str(cols[3])) + with open(filter_filename, "r") as fileobj: + header = fileobj.readline().split() + header_colnum = header.index('STORM_ID') + for line in fileobj: + storm_id_list.add(str(line.split()[header_colnum])) # sort the unique storm ids, copy the original # set by using sorted rather than sort. @@ -461,31 +491,27 @@ def retrieve_and_regrid(tmp_filename, cur_init, cur_storm, out_dir, logger, p): cur_function = sys._getframe().f_code.co_name # Get variables, etc. from constants_pdef.py param/config file. - gfs_dir = p.opt["GFS_DIR"] - regrid_data_plane_exe = p.opt["REGRID_DATA_PLANE_EXE"] - wgrib2_exe = p.opt["WGRIB2"] - egrep_exe = p.opt["EGREP_EXE"] - regrid_with_MET_tool = p.opt["REGRID_USING_MET_TOOL"] - overwrite_flag = p.opt["OVERWRITE_TRACK"] - + gfs_dir = p.getdir('GFS_DIR') + regrid_data_plane_exe = p.getexe('REGRID_DATA_PLANE_EXE') + wgrib2_exe = p.getexe('WGRIB2') + egrep_exe = p.getexe('EGREP_EXE') + regrid_with_MET_tool = p.getbool('config','REGRID_USING_MET_TOOL') + overwrite_flag = p.getbool('config','OVERWRITE_TRACK') + # Extract the columns of interest: init time, lead time, # valid time lat and lon of both tropical cyclone tracks, etc. # Then calculate the forecast hour and other things. with open(tmp_filename, "r") as tf: - # read header - header = tf.readline().split() - # get column number for columns on interest - header_colnum_init, header_colnum_lead, header_colnum_valid = header.index('INIT'), header.index( - 'LEAD'), header.index('VALID') - header_colnum_alat, header_colnum_alon = header.index('ALAT'), header.index('ALON') - header_colnum_blat, header_colnum_blon = header.index('BLAT'), header.index('BLON') - for line in tf: + #read header + header = tf.readline().split() + #get column number for columns on interest + header_colnum_init, header_colnum_lead, header_colnum_valid = header.index('INIT'), header.index('LEAD'), header.index('VALID') + header_colnum_alat, header_colnum_alon = header.index('ALAT'), header.index('ALON') + header_colnum_blat, header_colnum_blon = header.index('BLAT'), header.index('BLON') + for line in tf: col = line.split() - init, lead, valid, alat, alon, blat, blon = col[header_colnum_init], col[header_colnum_lead], \ - col[header_colnum_valid], col[header_colnum_alat],\ - col[header_colnum_alon], col[header_colnum_blat], \ - col[header_colnum_blon] - + init, lead, valid, alat, alon, blat, blon = col[header_colnum_init], col[header_colnum_lead], col[header_colnum_valid], col[header_colnum_alat], col[header_colnum_alon], col[header_colnum_blat], col[header_colnum_blon] + # integer division for both Python 2 and 3 lead_time = int(lead) fcst_hr = lead_time // 10000 @@ -528,12 +554,12 @@ def retrieve_and_regrid(tmp_filename, cur_init, cur_storm, out_dir, logger, p): # Create the filename for the regridded file, which is a # grib2 file. fcstSTS = sts.StringTemplateSubstitution(logger, - p.opt["GFS_FCST_FILE_TMPL"], + p.getraw('filename_templates','GFS_FCST_FILE_TMPL'), init=init_YYYYmmddHH, lead=lead_str) anlySTS = sts.StringTemplateSubstitution(logger, - p.opt["GFS_ANLY_FILE_TMPL"], + p.getraw('filename_templates', 'GFS_ANLY_FILE_TMPL'), valid=valid_YYYYmmddHH, lead=lead_str) @@ -589,9 +615,9 @@ def retrieve_and_regrid(tmp_filename, cur_init, cur_storm, out_dir, logger, p): tile_dir = os.path.join(out_dir, cur_init, cur_storm) fcst_hr_str = str(fcst_hr).zfill(3) - fcst_regridded_filename = p.opt["FCST_TILE_PREFIX"] + fcst_hr_str + "_" + fcst_anly_base + fcst_regridded_filename = p.getstr('regex_pattern','FCST_TILE_PREFIX') + fcst_hr_str + "_" + fcst_anly_base fcst_regridded_file = os.path.join(tile_dir, fcst_regridded_filename) - anly_regridded_filename = p.opt["ANLY_TILE_PREFIX"] + fcst_hr_str + "_" + fcst_anly_base + anly_regridded_filename = p.getstr('regex_pattern','ANLY_TILE_PREFIX') + fcst_hr_str + "_" + fcst_anly_base anly_regridded_file = os.path.join(tile_dir, anly_regridded_filename) # Regrid the fcst file only if a fcst tile @@ -614,13 +640,12 @@ def retrieve_and_regrid(tmp_filename, cur_init, cur_storm, out_dir, logger, p): var_level_string, ' -method NEAREST '] regrid_cmd_fcst = ''.join(fcst_cmd_list) - regrid_fcst_out = subprocess.check_output(regrid_cmd_fcst, - stderr= - subprocess.STDOUT, - shell=True) + regrid_cmd_fcst = batchexe('sh')['-c',regrid_cmd_fcst].err2out() + #regrid_cmd_fcst = batchexe(regrid_cmd_fcst.split()[0])[regrid_cmd_fcst.split()[1:]].err2out() msg = ("INFO|[regrid]| regrid_data_plane regrid command:" + - regrid_cmd_fcst) + regrid_cmd_fcst.to_shell()) logger.debug(msg) + regrid_fcst_out = run(regrid_cmd_fcst) else: # Perform regridding via wgrib2 @@ -631,13 +656,13 @@ def retrieve_and_regrid(tmp_filename, cur_init, cur_storm, out_dir, logger, p): ' -new_grid ', fcst_grid_spec, ' ', fcst_regridded_file] wgrb_cmd_fcst = ''.join(fcst_cmd_list) + wgrb_cmd_fcst = batchexe('sh')['-c',wgrb_cmd_fcst].err2out() + #wgrb_cmd_fcst = batchexe(wgrb_cmd_fcst.split()[0])[wgrb_cmd_fcst.split()[1:]].err2out() msg = ("INFO|[wgrib2]| wgrib2 regrid command:" + - wgrb_cmd_fcst) + wgrb_cmd_fcst.to_shell()) logger.debug(msg) - wgrb_fcst_out = subprocess.check_output(wgrb_cmd_fcst, - stderr= - subprocess.STDOUT, - shell=True) + wgrb_fcst_out = run(wgrb_cmd_fcst) + # Create new gridded file for anly tile if file_exists(anly_regridded_file) and not overwrite_flag: @@ -657,10 +682,9 @@ def retrieve_and_regrid(tmp_filename, cur_init, cur_storm, out_dir, logger, p): var_level_string, ' ', ' -method NEAREST '] regrid_cmd_anly = ''.join(anly_cmd_list) - regrid_anly_out = subprocess.check_output(regrid_cmd_anly, - stderr= - subprocess.STDOUT, - shell=True) + regrid_cmd_anly = batchexe('sh')['-c',regrid_cmd_anly].err2out() + #regrid_cmd_anly = batchexe(regrid_cmd_anly.split()[0])[regrid_cmd_anly.split()[1:]].err2out() + regrid_anly_out = run(regrid_cmd_anly) msg = ("INFO|[regrid]| on anly file:" + anly_regridded_file) logger.debug(msg) else: @@ -672,12 +696,11 @@ def retrieve_and_regrid(tmp_filename, cur_init, cur_storm, out_dir, logger, p): ' -new_grid ', anly_grid_spec, ' ', anly_regridded_file] wgrb_cmd_anly = ''.join(anly_cmd_list) - wgrb_anly_out = subprocess.check_output(wgrb_cmd_anly, - stderr= - subprocess.STDOUT, - shell=True) + wgrb_cmd_anly = batchexe('sh')['-c',wgrb_cmd_anly].err2out() + #wgrb_cmd_anly = batchexe(wgrb_cmd_anly.split()[0])[wgrb_cmd_anly.split()[1:]].err2out() msg = ("INFO|[wgrib2]| Regridding via wgrib2:" + - wgrb_cmd_anly) + wgrb_cmd_anly.to_shell()) + wgrb_anly_out = run(wgrb_cmd_anly) logger.debug(msg) @@ -710,9 +733,9 @@ def retrieve_var_info(p, logger): cur_filename = sys._getframe().f_code.co_filename cur_function = sys._getframe().f_code.co_name - var_list = p.opt["VAR_LIST"] - extra_var_list = p.opt["EXTRACT_TILES_VAR_LIST"] - regrid_with_MET_tool = p.opt["REGRID_USING_MET_TOOL"] + var_list = getlist(p.getstr('config','VAR_LIST')) + extra_var_list = getlist(p.getstr('config','EXTRACT_TILES_VAR_LIST')) + regrid_with_MET_tool = p.getbool('config','REGRID_USING_MET_TOOL') full_list = [] # Append the extra_var list to the var_list @@ -789,17 +812,17 @@ def create_grid_specification_string(lat, lon, logger, p): # Useful for logging cur_filename = sys._getframe().f_code.co_filename cur_function = sys._getframe().f_code.co_name - regrid_by_MET = p.opt["REGRID_USING_MET_TOOL"] + regrid_by_MET = p.getbool('config','REGRID_USING_MET_TOOL') # Initialize the tile grid string # and get the other values from the parameter file tile_grid_str = ' ' - nlat = str(p.opt["NLAT"]) - nlon = str(p.opt["NLON"]) - dlat = str(p.opt["DLAT"]) - dlon = str(p.opt["DLON"]) - lon_subtr = p.opt["LON_ADJ"] - lat_subtr = p.opt["LAT_ADJ"] + nlat = p.getstr('config','NLAT') + nlon = p.getstr('config','NLON') + dlat = p.getstr('config','DLAT') + dlon = p.getstr('config','DLON') + lon_subtr = p.getfloat('config','LON_ADJ') + lat_subtr = p.getfloat('config','LAT_ADJ') # Format for regrid_data_plane: # latlon Nx Ny lat_ll lon_ll delta_lat delta_lonadj_lon = float(lon) - lon_subtr @@ -986,10 +1009,10 @@ def apply_series_filters(tile_dir, init_times, series_output_dir, p, logger): # Retrieve any necessary values from the param/config file, # constants_pdef.py. - tc_stat_exe = p.opt["TC_STAT"] + tc_stat_exe = p.getexe('TC_STAT') cur_pid = str(os.getpid()) - tmp_dir = os.path.join(p.opt["TMP_DIR"], cur_pid) - filter_opts = p.opt["SERIES_ANALYSIS_FILTER_OPTS"] + tmp_dir = os.path.join(p.getdir('TMP_DIR'), cur_pid) + filter_opts = p.getstr('config','SERIES_ANALYSIS_FILTER_OPTS') for cur_init in init_times: # Create the ASCII file with the storms that meet the @@ -1031,8 +1054,7 @@ def apply_series_filters(tile_dir, init_times, series_output_dir, p, logger): # storm ids that resulted from filtering. sorted_storm_ids = get_storm_ids(filter_filename, logger) - # Retrieve the header from filter_filename (which is output from - # MET TC-STAT) to be used in creating the temporary files. + # Retrieve the header from filter_filename to be used in creating the temporary files. with open(filter_filename, 'r') as ff: header = ff.readline() @@ -1066,7 +1088,7 @@ def apply_series_filters(tile_dir, init_times, series_output_dir, p, logger): prune_empty(series_output_dir, p, logger) # Clean up the tmp dir - subprocess.call(["rm", "-rf", tmp_dir]) + rmtree(tmp_dir) def create_filter_tmp_files(filtered_files_list, filter_output_dir, p, logger): @@ -1176,6 +1198,24 @@ def get_dirs(base_dir, p, logger): return dir_list +def getlist(s,logger=None): + + # returns a list of string elements from a comma or space + # separated string of values, returns and empty list + # if s is '' + # '4,4,2,4,2,4,2, ' or '4,4,2,4,2,4,2 ' or + # '4, 4, 4, 4, ' or '4, 4, 4, 4 ' + + # removes surrounding comma, and spaces, if present. + s = s.strip().strip(',').strip() + + if ',' in s: + s = s.split(',') + s = [item.strip() for item in s] + else: + s = s.split() + + return s if __name__ == "__main__": # test grep