Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature 728 update python embedding #876

Merged
merged 7 commits into from
Apr 13, 2021
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
# Scientific Objective
# --------------------
#
# Once this method is complete, a forecast and reference track analysis file
# for the valid date of interest (YYYYMMDDHH) will have been created {OUTPUT_BASE}/decks,
# forecast and reference tracks paired up {OUTPUT_BASE}/tc_pairs and global storm tracks
# for the valid date of interest will be plotted {OUTPUT_BASE}/cyclone (PlateCaree projection)
# Once this method is complete, a user-created extra TC track file
# for the valid date of interest (YYYYMMDDHH) will have been created,
# paired up by TCPairs, and global storm tracks
# for the valid date of interest will be plotted by CyclonePlotter (PlateCaree projection)

##############################################################################
# Datasets
Expand Down Expand Up @@ -44,17 +44,17 @@
# METplus Components
# ------------------
#
# This use case utilizes the METplus TCPairs wrapper to search for
# files that are valid at a given run time and generate a command to run
# the MET tool tc_pairs. It then uses the CyclonePlotter wrapper to create
# a global plot of storm tracks for the desired day of interest (YYYYMMDDHH)
# This use case utilizes Python user script-created output files that are accessible via the TCPairs wrapper.
# Due to the nature of the source file (already tracked extra TCs), the TCPairs wrapper is passed the "Adeck" file for each storm twice:
# once as the adeck or forecast file, and once as the bdeck or analysis file. Essentially, TCPairs is matching a forecast to itself.
# It then uses the CyclonePlotter wrapper to create a global plot of storm tracks for the desired day of interest (YYYYMMDDHH).

##############################################################################
# METplus Workflow
# ----------------
#
# TCPairs is the first tool called in this example. It processes the following
# run times:
# run times for each storm file:
#
# | **Init/Valid:** 2020100700
# |
Expand Down Expand Up @@ -95,7 +95,11 @@
# Python Embedding
# ----------------
#
# This use case uses a Python embedding script to read input data
# This use case uses a Python embedding script to read input data.
# Because the source file already contains "analysis" tracks for the extra TCs,
# this Python script only needs to output storm tracks that have a valid time matching
# the user input. These storms are put into separate storm files, to better mimic how TC storms are
# typically passed to TCPairs.
#
# parm/use_cases/model_applications/tc_and_extra_tc/CyclonePlotter_fcstGFS_obsGFS_OPC/extract_opc_decks.py
#
Expand Down Expand Up @@ -142,11 +146,12 @@
# Output for this use case will be found in **tc_pairs/201412** (relative to **OUTPUT_BASE**)
# and will contain the following files:
#
# * decks/adeck.2020100700.dat
# * decks/bdeck.2020100700.dat
# * tc_pairs/tc_pairs.2020100700.dat
# * decks/adeck/adeck.2020100700.xxxx.dat
# * tc_pairs/tc_pairs.2020100700.xxxx.tcst
# * cyclone/20201007.png
# * cyclone/20201007.txt
#
# where "xxxx" is the unique four digit storm identifier for TCPairs wrapper to use.

##############################################################################
# Keywords
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@ USER_SCRIPT_RUNTIME_FREQ = RUN_ONCE_PER_INIT_OR_VALID
USER_SCRIPT_PATH = {PARM_BASE}/use_cases/model_applications/tc_and_extra_tc/CyclonePlotter_fcstGFS_obsGFS_OPC/extract_opc_decks.py

USER_SCRIPT_INPUT_PATH = {INPUT_BASE}/model_applications/tc_and_extra_tc/CyclonePlotter_fcstGFS_obsGFS_OPC/trak.gfso.atcf_gen.glbl.{init?fmt=%Y}
#USER_SCRIPT_INPUT_PATH = /d2/projects/extra-tc_verif/gpfs/dell1/nco/ops/com/gentracks/prod/gentracks/{init?fmt=%Y}/trak.gfso.atcf_gen.glbl.{init?fmt=%Y}


USER_SCRIPT_OUTPUT_DIR = {OUTPUT_BASE}/decks
USER_SCRIPT_COMMAND = {USER_SCRIPT_PATH} {USER_SCRIPT_INPUT_PATH} {USER_SCRIPT_OUTPUT_DIR} {init?fmt=%Y%m%d%H}

# A list of times to include, in format YYYYMMDD_hh
Expand All @@ -54,7 +51,7 @@ TC_PAIRS_VALID_END =
#
# Run MET tc_pairs by indicating the top-level directories for the A-deck and B-deck files. Set to 'yes' to
# run using top-level directories, 'no' if you want to run tc_pairs on files paired by the wrapper.
TC_PAIRS_READ_ALL_FILES = yes
TC_PAIRS_READ_ALL_FILES = no

# set to true or yes to reformat track data into ATCF format expected by tc_pairs
TC_PAIRS_REFORMAT_DECK = no
Expand Down Expand Up @@ -93,6 +90,8 @@ TC_PAIRS_STORM_NAME =
# minimum distance from land.
TC_PAIRS_DLAND_FILE = MET_BASE/tc_data/dland_global_tenth_degree.nc

# setting this so that when verifying against analysis track, the union of points are written
TC_PAIRS_MET_CONFIG_OVERRIDES = match_points = FALSE;

##
# only 00, 06, 12, and 18z init times are supported in NOAA website,
Expand Down Expand Up @@ -130,15 +129,15 @@ CYCLONE_PLOTTER_ADD_WATERMARK = False

USER_SCRIPT_OUTPUT_DIR = {OUTPUT_BASE}/decks

TC_PAIRS_ADECK_INPUT_DIR = {USER_SCRIPT_OUTPUT_DIR}
TC_PAIRS_BDECK_INPUT_DIR = {USER_SCRIPT_OUTPUT_DIR}
TC_PAIRS_ADECK_INPUT_DIR = {USER_SCRIPT_OUTPUT_DIR}/adeck
TC_PAIRS_BDECK_INPUT_DIR = {USER_SCRIPT_OUTPUT_DIR}/adeck

TC_PAIRS_OUTPUT_DIR = {OUTPUT_BASE}/tc_pairs

CYCLONE_PLOTTER_INPUT_DIR = {TC_PAIRS_OUTPUT_DIR}
CYCLONE_PLOTTER_OUTPUT_DIR = {OUTPUT_BASE}/cyclone

[filename_templates]
TC_PAIRS_ADECK_TEMPLATE = adeck.{init?fmt=%Y%m%d%H}.dat
TC_PAIRS_BDECK_TEMPLATE = bdeck.{init?fmt=%Y%m%d%H}.dat
TC_PAIRS_OUTPUT_TEMPLATE = tc_pairs.{init?fmt=%Y%m%d%H}
TC_PAIRS_ADECK_TEMPLATE = adeck.{init?fmt=%Y%m%d%H}.{cyclone}.dat
TC_PAIRS_BDECK_TEMPLATE = adeck.{init?fmt=%Y%m%d%H}.{cyclone}.dat
TC_PAIRS_OUTPUT_TEMPLATE = tc_pairs.{init?fmt=%Y%m%d%H}.{cyclone}
Original file line number Diff line number Diff line change
Expand Up @@ -34,49 +34,104 @@
if num_args < 3:
print("ERROR: Not enough arguments")
sys.exit(1)

# function to extract start date from stormname (stormname contains date 1st observed, lat-lon 1st observed)
def startswith_date(storm_name, search_date):
storm_date = str(storm_name).split('_')[0].strip()
return storm_date.startswith(search_date)
debug = 'debug' in sys.argv
# function to compare storm warning time to search time
def is_equal(column_val, search_string):
return str(column_val).strip() == search_string

input_file = sys.argv[1]
output_dir = sys.argv[2]
search_date = sys.argv[3]

# name of ADECK & BDECK files contain search date
adeck_filename = f'adeck.{search_date}.dat'
bdeck_filename = f'bdeck.{search_date}.dat'
if debug:
print(f"Running {__file__}\nSearch date: {search_date}")

# get 2 digit year to use in CYCLONE column substitute value
search_year = search_date[2:4]

# string to use in output file names for filtered adeck and bdeck files
file_prefix = f'deck.{search_date}.'

# an intermediate directory path for the separate files
adeck_base = os.path.join(output_dir, "adeck")
#bdeck_base = os.path.join(output_dir, "bdeck")

adeck_path = os.path.join(output_dir, adeck_filename)
bdeck_path = os.path.join(output_dir, bdeck_filename)
# create output directories if not already there
if not os.path.exists(adeck_base):
print(f"Creating output directory: {adeck_base}")
os.makedirs(adeck_base)

#if not os.path.exists(bdeck_base):
# print(f"Creating output directory: {bdeck_base}")
# os.makedirs(bdeck_base)

# using pandas (pd), read input file
print(f"Reading input file: {input_file}")
pd_data = pd.read_csv(input_file, names=atcf_headers_trak)

# get adeck - all lines that match the desired date for YYYYMMDDHH (init time)
init_matches = pd_data['YYYYMMDDHH'].apply(startswith_date, args=(search_date,))
adeck = pd_data[init_matches]
print(f"Filtering data...")

# get all 0 hour analyses data
print(f"Filtering data 0 (hr) in TAU (forecast hour) column for bdeck")
pd_0hr_data = pd_data[pd_data['TAU'] == 0]

# get adeck - all lines that match the desired date for YYYYMMDDHH (init time)
print(f"Filtering data with {search_date} in YYYYMMDDHH column for adeck")
init_matches = pd_data['YYYYMMDDHH'].apply(is_equal,
args=(search_date,))
adeck = pd_data[init_matches]

# get list of STORMNAMEs from adeck data
all_storms = adeck.STORMNAME.unique()

# get lines where forecast hour is 0 and STORMNAME is in ADECK list
only_adeck_storms = pd_0hr_data['STORMNAME'].isin(all_storms)
bdeck = pd_0hr_data[only_adeck_storms]

# create output directory if not already there
if not os.path.exists(output_dir):
print(f"Creating output directory: {output_dir}")
os.makedirs(output_dir)

# write ADECK
print(f"Writing adeck to {adeck_path}")
adeck.to_csv(adeck_path, header=False, index=False)

# write BDECK
print(f"Writing bdeck to {bdeck_path}")
bdeck.to_csv(bdeck_path, header=False, index=False)
# initialize counter to use to set output filenames with "cyclone" number
# to keep storms in separate files
index = 0

# loop over storms
for storm_name in all_storms:
index_pad = str(index).zfill(4)

# remove whitespace at beginning of storm name
storm_name = storm_name.strip()

# get 0hr data for given storm to use as bdeck
storm_b_match = pd_0hr_data['STORMNAME'].apply(is_equal,
args=(storm_name,))
storm_bdeck = pd_0hr_data[storm_b_match]
if debug:
print(f"Processing storm: {storm_name}")
wrote_a = wrote_b = False

#Logic for writing out Analysis files. Currently commented out,
#but left in for possible future use
if not storm_bdeck.empty:
# bdeck_filename = f'b{file_prefix}{index_pad}.dat'
# bdeck_path = os.path.join(bdeck_base, bdeck_filename)

# print(f"Writing bdeck to {bdeck_path}")
# storm_bdeck.to_csv(bdeck_path, header=False, index=False)
wrote_b = True
#else:
# print(f"BDECK for {storm_name} is empty. Skipping")

# filter out adeck data for given storm
storm_a_match = adeck['STORMNAME'].apply(is_equal,
args=(storm_name,))
storm_adeck = adeck[storm_a_match]

if not storm_adeck.empty:
adeck_filename = f'a{file_prefix}{index_pad}.dat'
adeck_path = os.path.join(adeck_base, adeck_filename)
if debug:
print(f"Writing adeck to {adeck_path}")
storm_adeck.to_csv(adeck_path, header=False, index=False)
wrote_a = True
else:
if debug:
print(f"ADECK for {storm_name} is empty. Skipping")

if wrote_a or wrote_b:
index += 1

print("Finished processing all storms")