Some tidying, and getting tidying up of files correct

robertsmalcolm · Sep 6, 2024 · 37edb5d · 37edb5d
1 parent e78811b
commit 37edb5d
Show file tree

Hide file tree

Showing 4 changed files with 10 additions and 118 deletions.
diff --git a/inline_model_storms/tempest_common.py b/inline_model_storms/tempest_common.py
@@ -191,14 +191,13 @@ def _tidy_data_files(self, timestamp, timestamp_end, var_list,
         """
         self.logger.info(f"Tidy up input files")
         files_remove = []
-        #source_files, processed_files = self._generate_file_names(timestamp,
-        #                                                          timestamp_end)
 
         if f_remove == 'processed':
             for var in var_list:
                 f = self._file_pattern_processed(timestamp, timestamp_end, var,
                                                  frequency=self.data_frequency)
                 if os.path.exists(os.path.join(self.outdir, f)):
+                    self.logger.info(f"Deleting {os.path.join(self.outdir, f)}")
                     os.remove(os.path.join(self.outdir, f))
 
     def _tidy_track_files(
@@ -427,54 +426,6 @@ def _check_time_coord(self, fnames):
                 self.logger.debug(f"cmd {cmd}")
                 subprocess.call(cmd, shell=True)
 
-    def _generate_file_names(self, time_start, time_end):
-        """
-        Generate a list of input and output filenames.
-
-        :param str time_start: The timestep of the start of the data period to process
-        :param str time_end: The timestep of the end of the data period to process
-        :returns: A dictionary of the files found for this period and a string
-            containing the period between samples in the input data.
-        :rtype: dict
-        """
-        source_filenames = {}
-        processed_filenames = {}
-
-        variables_required = {}
-        # these variables need to have a new var_name, either because the default
-        # from the UM is confusing or unknown, and these names are needed for the
-        # variable name inputs for the TempestExtremes scripts
-        for var in self.variables_input:
-            variables_required[var] = {'fname': var}
-            if var in self.variables_rename:
-                variables_required[var].update({'varname_new': var})
-
-        for var in self.variables_input:
-            filename = self._file_pattern(self.time_range.split('-')[0],
-                                          self.time_range.split('-')[1],
-                                          variables_required[var]["fname"],
-                                          um_stream='pt')
-
-            input_path = os.path.join(self.input_directory, filename)
-
-            # make the output path filename similar to CMIP6 naming, will be standard
-            # regardless of the input filename structure
-            # varname_new, freq, time
-            var_name = var
-            if "varname_new" in variables_required[var]:
-                var_name = variables_required[var]["varname_new"]
-            output_path = self._file_pattern_processed(time_start,
-                                                       time_end,
-                                                       var_name,
-                                                       self.frequency)
-
-            output_path = os.path.join(self.outdir, output_path)
-
-            source_filenames[var] = input_path
-            processed_filenames[var] = output_path
-
-        return source_filenames, processed_filenames
-
     def _identify_processed_files(self, time_start, time_end, grid_resol="native"):
         """
         Identify the processed input files to be used by tracking.

diff --git a/inline_model_storms/tempest_cyclone.py b/inline_model_storms/tempest_cyclone.py
@@ -99,6 +99,7 @@ def run(self, *args, **kwargs):
         timestamp_next = self.next_cycle[:8]
         startdate = self.startdate[:8]
         enddate = self.enddate[:8]
+        timestamp_tm2 = self.tm2_cycle[:8]
         end_of_year = False
         if timestamp_current[:4] != timestamp_next[:4]:
             end_of_year = True
@@ -108,6 +109,7 @@ def run(self, *args, **kwargs):
         self.logger.debug(f"lastcycle {self.lastcycle}")
         self.logger.debug(f"previous_cycle {self.previous_cycle}")
         self.logger.debug(f"next_cycle {self.next_cycle}")
+        self.logger.debug(f"tm2_cycle {timestamp_tm2}")
         self.logger.debug(f"is_last_cycle {self.is_last_cycle}")
         self.logger.debug(f"inline_tracking {self.inline_tracking}")
         self.logger.debug(f"track_by_year {self.track_by_year}")
@@ -189,7 +191,7 @@ def run(self, *args, **kwargs):
                 timestamp_next)
 
             self._tidy_files(self.outdir,
-                             self.tm2_cycle[:8],
+                             timestamp_tm2[:8],
                              self.previous_cycle[:8])
 
     def _run_tracking_and_editing(
@@ -365,7 +367,7 @@ def _archive_track_data(self, outdir, timestamp, timestamp_next,
                                 with open(f_archive, "a"):
                                     os.utime(f_archive, None)
 
-    def _tidy_files(self, outdir, timestamp_previous, timestamp_tm2):
+    def _tidy_files(self, outdir, timestamp_tm2, timestamp_previous):
         """
         Find all files that will not be needed on next step, and remove them
 
@@ -393,6 +395,7 @@ def _tidy_files(self, outdir, timestamp_previous, timestamp_tm2):
                 variables_to_delete.remove(var)
         else:
             variables_to_delete = self.variables_rename.copy()
+
         self._tidy_data_files(timestamp_tm2,
                               timestamp_previous, variables_to_delete)
 

diff --git a/inline_model_storms/um_postprocess.py b/inline_model_storms/um_postprocess.py
@@ -96,16 +96,20 @@ def run(self, *args, **kwargs):
                 if not self.is_last_cycle == "true":
                     if self.track_at_end:
                         # if we're tracking at end, we may need to retain all nodeedit files
+                        # this deletion is done in tempest_cyclone
                         pass
                     #self._tidy_data_files(timestamp_tm2,
                     #        timestamp_previous, self.variables_rename)
 
         # delete source data if required
+        self.logger.info(f"delete source {self.delete_source}")
         if self.delete_source:
             for var in self.variables_input:
                 fname = self._file_pattern(timestamp_tm2 + "*", "*", var,
                                        stream=self.um_stream, frequency="*")
+
                 file_name = os.path.join(self.input_directory, self.ensemble, fname)
+                self.logger.info(f"deleting source file_name {file_name}")
                 files_exist = glob.glob(file_name)
                 if len(files_exist) > 0:
                     for f in files_exist:

diff --git a/inline_model_storms/um_preprocess.py b/inline_model_storms/um_preprocess.py
@@ -120,71 +120,6 @@ def run(self, *args, **kwargs):
                                               psl_input_var, grid_resol=regrid_resol)
                 #self._produce_derived_diagnostics(source_files, processed_files)
 
-    # def _file_pattern(self, timestart, timeend, varname,
-    #                   frequency="6h", stream="pt"):
-    #     """
-    #     Derive the input nc filenames from the file pattern, assuming a
-    #     um model filenaming pattern as here, or could be other patterns
-    #     for other models/platforms (which would need to be added)
-    #
-    #     :param str timestart: The timestep of the start of the data period to process
-    #     :param str timeend: The timestep of the end of the data period to process
-    #     :param str um_stream: The name of the um output stream (output file
-    #     :                     identification)
-    #     :param str frequency: The frequency of the input data (in hours, needs to
-    #     :                     include "h"), used to determine file naming
-    #     :returns: a filename given the inputs to the pattern
-    #     :rtype: str
-    #     """
-    #     if self.frequency is None:
-    #         file_freq = frequency
-    #     else:
-    #         file_freq = str(self.frequency)+"h"
-    #
-    #     if self.input_file_pattern != '':
-    #         # file format based on input pattern
-    #         fname = self.input_file_pattern.format(
-    #             runid=self.runid,
-    #             frequency=file_freq,
-    #             date_start=timestart,
-    #             date_end=timeend,
-    #             variable=varname,
-    #             stream=stream
-    #         )
-    #     self.logger.info(f"fname from _file_pattern {fname} {self.um_stream} {timestart}" +\
-    #                     f" {timeend} {varname}")
-    #     return fname.strip('"')
-    #
-    # def _file_pattern_processed(self, timestart, timeend, varname,
-    #                   frequency="6h"):
-    #     """
-    #     For processed files, we know what the filenames look like, so
-    #     search specifically
-    #
-    #     :param str timestart: The timestep of the start of the data period to process
-    #     :param str timeend: The timestep of the end of the data period to process
-    #     :param str frequency: The frequency of the input data (in hours, needs to
-    #     :                     include "h"), used to determine file naming
-    #     :returns: a filename given the inputs to the pattern
-    #     :rtype: str
-    #     """
-    #     if self.frequency is None:
-    #         file_freq = frequency
-    #     else:
-    #         file_freq = str(self.frequency)+"h"
-    #
-    #     fname = self.file_pattern_processed.format(
-    #         runid=self.runid,
-    #         frequency=file_freq,
-    #         date_start=timestart,
-    #         date_end=timeend,
-    #         variable=varname
-    #     )
-    #
-    #     self.logger.info(f"fname from _file_pattern_processed {fname} {timestart} " + \
-    #                      "{timeend} {varname}")
-    #     return fname.strip('"')
-
     def _generate_data_files(self, timestamp, timestamp_end,
                              psl_var, grid_resol="native"):
         """
@@ -469,7 +404,6 @@ def _produce_derived_diagnostics(self, dir_proc, processed_filenames):
             fname_sf_850 = fname_ua_850.replace("ua_850", "sf_850")
             iris.save(sf, fname_sf_850)
 
-
     def _get_app_options(self):
         """Get commonly used configuration items from the config file"""