From 20ff5663216190680e9a608c5e21a75cdfbf0754 Mon Sep 17 00:00:00 2001
From: ManshaP <pmanshausen@gmail.com>
Date: Tue, 17 May 2022 11:14:42 +0100
Subject: [PATCH] update headers

---
 LICENSE                                       |   2 +-
 README.md                                     |   3 +
 matt_traj_code/ASCDATA_JASMIN.CFG             |   6 -
 matt_traj_code/SETUP_traj_lowcloud_15min.CFG  |  23 --
 matt_traj_code/traj_example.py                | 363 ------------------
 processing_pipeline/1_track_emissions.py      |  12 +
 .../2_advect_tracks_hysplit.py                |   4 +-
 .../3_interpolate_collocate_cfaw_hy.py        |  15 +-
 processing_pipeline/4_change_file_format.py   |  22 +-
 9 files changed, 52 insertions(+), 398 deletions(-)
 delete mode 100644 matt_traj_code/ASCDATA_JASMIN.CFG
 delete mode 100644 matt_traj_code/SETUP_traj_lowcloud_15min.CFG
 delete mode 100644 matt_traj_code/traj_example.py

diff --git a/LICENSE b/LICENSE
index 0c6fd1d..1865e91 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2022 Peter Manshausen
+Copyright (c) 2022 Peter Manshausen, Matt Christensen
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 32156c4..2f07e61 100644
--- a/README.md
+++ b/README.md
@@ -5,8 +5,11 @@ Code to track ship trajectories from AIS data, advect them with ERA5 winds using
 ## Data set production
 Below are examples of which keywords the python scripts expect to run the analysis for the month of 01/2016. 
 In order to run it, maps of emission locations are needed as well as a HYSPLIT installation, meteorological (e.g. ERA5) data, as well as MODIS cloud product data. The filepaths that need replacing on a different system are listed for each python script.  
+For an explanation of what each step does, see the headers of the scripts in `/processing_pipeline`.
 
+Pipeline:
 `python 0_modis_list.py 1 2016`\
+
 files in: MODIS files: `/neodc/modis/data/MOD06_L2/collection61/`\
 files out: MODIS that overlaps with Region of Interest\
 filepath0: `/home/users/pete_nut/IV_shiptracks/modis_tiles/atlantic_modis_swaths_intersect_{}_{}.pkl`
diff --git a/matt_traj_code/ASCDATA_JASMIN.CFG b/matt_traj_code/ASCDATA_JASMIN.CFG
deleted file mode 100644
index 9670a26..0000000
--- a/matt_traj_code/ASCDATA_JASMIN.CFG
+++ /dev/null
@@ -1,6 +0,0 @@
--90.0   -180.0  lat/lon of lower left corner
-1.0     1.0     lat/lon spacing in degrees
-180     360     lat/lon number of data points
-2               default land use category
-0.2             default roughness length (m)
-'/home/users/pete_nut/hysplit.v5.0.0_CentOS/bdyfiles/'  directory of files
diff --git a/matt_traj_code/SETUP_traj_lowcloud_15min.CFG b/matt_traj_code/SETUP_traj_lowcloud_15min.CFG
deleted file mode 100644
index 17823d8..0000000
--- a/matt_traj_code/SETUP_traj_lowcloud_15min.CFG
+++ /dev/null
@@ -1,23 +0,0 @@
- &SETUP
- tratio = 0.75,
- mgmin = 10,
- khmax = 9999,
- kmixd = 0,
- kmsl = 0,
- nstr = 0,
- mhrs = 9999,
- nver = 0,
- tout = 5,
- tm_tpot = 0,
- tm_tamb = 0,
- tm_rain = 0,
- tm_mixd = 0,
- tm_relh = 0,
- tm_sphu = 0,
- tm_mixr = 0,
- tm_dswf = 0,
- tm_terr = 0,
- dxf = 1.0,
- dyf = 1.0,
- dzf = 0.01,
- /
diff --git a/matt_traj_code/traj_example.py b/matt_traj_code/traj_example.py
deleted file mode 100644
index aba10d4..0000000
--- a/matt_traj_code/traj_example.py
+++ /dev/null
@@ -1,363 +0,0 @@
-# Name: HYSPLIT Lagrangian Trajectory Calculation
-#
-# Description: Simple python code to run HYSPLIT and output trajectory file
-#
-# Modes:
-# FORWARD trajectory, BACKWARD trajectory, and ALLWARDS trajectory (combines them)
-#
-# Inputs:
-# latitude, longitude, height, trajectory duration, outputpath, HYSPLIT executable path, meteorology path
-#
-# Output: tdump file
-#
-# Libraries: numpy, os and datetime
-#
-# Notes: This version outputs a trajectory location every 15 minutes and uses
-# NOAA ARL generated meteorological files for RP, GDAS, ERA5 or MERRA2.
-# For example of running code see RUN HYSPLIT trajectory and make modifications as necessary
-#
-# Current example produces a 12 hour backward and forward trajectory from 45 N 120 W on June 12th 2016 at 15:30 UTC
-# The two trajectories are "stitched" together. It is straightforward to run just a forward or back trajectory by negating the 'A' in trajHours variable.
-#-------------------------------------------------------------------------------------------
-import numpy as np
-import os
-import datetime
-
-def create_hysplit_control_file_multi(year,month,day,hour,minute,lats,lons,MLHeight,trajHours,path,workingpath,metpath):
-
-    nTraj = len(lats)
-    print('YEAR: ',year)
-    print('MONTH: ',month)
-    print('DAY: ',day)
-    print('HOUR: ',hour)
-    print('MINUTE: ',minute)
-    print('HYSPLIT Path: ',path)
-    print('Number of trajectories: ',nTraj)
-    #print('Height: ',MLHeight)
-    #print('Longitude: ',lons)
-    #print('Latitude: ',lats)
-    
-    #Fetch reanalysis files
-    if metpath.find("RP") > 0:  #RP reanalysis path used
-        metfiles = fetch_rp_reanalysis(month,year,metpath)
-
-    if metpath.find("GDAS") > 0:  #GDAS reanalysis path used
-        metfiles = fetch_gdas_reanalysis(day,month,year,metpath)
-
-    if metpath.find("ERA5") > 0:  #ERA5 reanalysis path
-        metfiles = fetch_era5_reanalysis(day,month,year,metpath,trajHours)
-    
-    if metpath.find("MERRA2") > 0:  #ERA5 reanalysis path
-        metfiles = fetch_merra2_reanalysis(day,month,year,metpath,trajHours)
-    
-    nmetfiles = len(metfiles)
-    
-    #---------------------------------------------------------------
-    # Create HYSPLIT CONTROL FILE
-    #---------------------------------------------------------------
-    fName = workingpath+'CONTROL'
-    print('CONTROL FILE: ',fName)
-
-    f = open(fName, 'w')
-    f.writelines( str(year-2000).zfill(2)+' '+str(month).zfill(2)+' '+str(day).zfill(2)+' '+str(hour).zfill(2) + "\n" ) #time
-    f.writelines( str(nTraj) +"\n") #number of trajectories
-    #starting location of each trajectory
-    for i in range(nTraj):
-        f.writelines("{:6.2f}".format((float(lats[i])))+'  '+"{:6.2f}".format((float(lons[i])))+'  '+"{:6.2f}".format(float(MLHeight[i])) +"\n")
-    f.writelines( str(int(trajHours)) +"\n")
-    f.writelines( str(1) + "\n") #isobaric trajectory
-    f.writelines( str(10000.0) + "\n") #top of model in meters
-    
-    #Meteorological files
-    f.writelines( str(nmetfiles) + "\n") #number of meteorologial files
-    for i in range(nmetfiles):
-        if len(os.path.dirname(metfiles[0])) == 0:
-            f.writelines(metpath + "\n")
-            f.writelines(metfiles[i] + "\n")
-        if len(os.path.dirname(metfiles[0])) > 0:
-            f.writelines(os.path.dirname(metfiles[i])+'/' + "\n")
-            f.writelines(os.path.basename(metfiles[i]) + "\n")
-    f.writelines(workingpath + "\n")
-    f.writelines('tdump' + "\n")
-    f.close()
-
-    errval=0
-    return errval
-
-#---------------------------------------------------------------
-#Function to fetch RP reanalysis files
-#---------------------------------------------------------------
-def fetch_rp_reanalysis(month,year,metpath):
-    metfiles = []
-    metjday  = []
-    for file in os.listdir(metpath):
-        if file.endswith(".gbl"):
-            metfiles.append(file)
-            metjday.append(datetime.datetime(int(file[2:6]),int(file[6:8]),15,0,0))
-
-    #Sort by time
-    sortID = np.asarray(np.argsort(metjday))
-    metfiles = np.asarray(metfiles)
-    metfiles = metfiles[sortID]
-    fct = sortID.shape[0]
-    metyears = [metjday[sortID[i]].year for i in range(len(metjday))]
-    metmonths= [metjday[sortID[i]].month for i in range(len(metjday))]
-
-    #Select 3 met files closest in time to the input time
-    nmetfiles = 3
-    index = (np.where((np.asarray(metyears) == year) & (np.asarray(metmonths) == month))[0])[0]
-    file0 = metfiles[ index-1] #prior month
-    file1 = metfiles[ index  ]
-    file2 = metfiles[ index+1] #next month
-    metfiles = [file0,file1,file2]
-    return metfiles
-
-
-#---------------------------------------------------------------
-#Function to fetch GDAS reanalysis files
-#---------------------------------------------------------------
-def fetch_gdas_reanalysis(day,month,year,metpath):
-    monSTR = ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']
-    monVal = [  1  ,  2  ,  3  ,  4  ,  5  ,  6  , 7   , 8   , 9   , 10  , 11  , 12  ]
-    weekSTR = ['w1' ,'w2' ,'w3' ,'w4','w5']
-    weekVal = [ 1   , 7   , 14   , 21  , 28]
-    metfiles = []
-    metjday  = []
-    for file in os.listdir(metpath):
-        if file.startswith("gdas1."):
-            #print(file)
-            monthName = file[6:9]
-            TMPyear = int(file[9:11])+2000
-            week = file[12:14]
-            mValID=monSTR.index(monthName)
-            TMPmonth=monVal[mValID]
-            wValID=weekSTR.index(week)
-            TMPday=weekVal[wValID]
-            metfiles.append(file)
-            metjday.append(datetime.datetime(TMPyear,TMPmonth,TMPday,0,0))
-            #print(monthName,' ',week,' ',year,' ',month,' ',day)
-            #print(datetime.datetime(year,month,day,0,0))
-
-
-    #Sort by time
-    sortID = np.asarray(np.argsort(metjday))
-    metfiles = np.asarray(metfiles)
-    metfiles = metfiles[sortID]
-    fct = sortID.shape[0]
-    metyears = [metjday[sortID[i]].year for i in range(len(metjday))]
-    metmonths= [metjday[sortID[i]].month for i in range(len(metjday))]
-    metdays  = [metjday[sortID[i]].day for i in range(len(metjday))]
-    metjdays = [metjday[sortID[i]] for i in range(len(metjday))]
-
-    #Select 5 met files closest in time to the input time
-    nmetfiles = 5
-    jdayRef= datetime.datetime(year,month,day)
-    diffTime = np.asarray([(jdayRef-metjdays[i]).total_seconds()  for i in range(len(metjday))])
-    index = (np.where( abs(diffTime) == min(abs(diffTime)) ))[0][0]
-    file0 = metfiles[ index-2] #prior weeks
-    file1 = metfiles[ index-1] #prior week
-    file2 = metfiles[ index]   
-    file3 = metfiles[ index+1] #next week
-    file4 = metfiles[ index+2] #next weeks
-    metfiles = [file0,file1,file2,file3,file4]
-    return metfiles
-
-
-#---------------------------------------------------------------
-#Function to fetch ERA5 reanalysis files
-#---------------------------------------------------------------
-def fetch_era5_reanalysis(day,month,year,metpath,trajHours):
-    if trajHours > 0: t0 = datetime.datetime(year,month,day)
-    if trajHours > 0: t1 = t0+datetime.timedelta(hours=trajHours)
-    if trajHours < 0: t1 = datetime.datetime(year,month,day)
-    if trajHours < 0: t0 = t1-datetime.timedelta(hours=abs(trajHours))
-    nDays = int(np.ceil(((t1-t0).total_seconds())/86400.))+1
-    metfiles = []
-    metjday  = []
-    for file in os.listdir(metpath):
-        if file.endswith(".ARL"):
-            metfiles.append(file)
-            metjday.append(datetime.datetime(int(file[5:9]),int(file[9:11]),int(file[11:13]),0,0))
-    #Sort by time
-    sortID = np.asarray(np.argsort(metjday))
-    metfiles = np.asarray(metfiles)
-    metfiles = metfiles[sortID]
-    fct = sortID.shape[0]
-    metyears = [metjday[sortID[i]].year for i in range(len(metjday))]
-    metmonths= [metjday[sortID[i]].month for i in range(len(metjday))]
-    metdays= [metjday[sortID[i]].day for i in range(len(metjday))]
-    metjday2 = [metjday[sortID[i]] for i in range(len(metjday))]
-    index, = np.where( (np.asarray(metjday2) >= t0) & (np.asarray(metjday2) <= t1) )
-    indices = [index[0]-1,index[len(index)-1]+1]
-    return metfiles[index[0]-1:index[len(index)-1]+2]
-
-
-#---------------------------------------------------------------
-#Function to fetch MERRA2 reanalysis files
-#---------------------------------------------------------------
-def fetch_merra2_reanalysis(day,month,year,metpath,trajHours):
-    if trajHours > 0: t0 = datetime.datetime(year,month,day)
-    if trajHours > 0: t1 = t0+datetime.timedelta(hours=trajHours)
-    if trajHours < 0: t1 = datetime.datetime(year,month,day)
-    if trajHours < 0: t0 = t1-datetime.timedelta(hours=abs(trajHours))
-    nDays = int(np.ceil(((t1-t0).total_seconds())/86400.))+1
-    metfiles = []
-    metjday  = []
-    for iD in range(nDays):
-        t = t0 + datetime.timedelta(days=iD)
-        f = metpath + str(t.year).zfill(4) +'/'+ str(t.month).zfill(2) +'/'+ str(t.day).zfill(2) + '/' + 'MERRA2_'+str(t.year).zfill(4)+str(t.month).zfill(2)+str(t.day).zfill(2)+'.ARL'
-        if os.path.exists(f): 
-            metfiles.append(f)
-            metjday.append( t )
-        print(t.year,t.month,t.day)
-    return metfiles
-
-
-
-#Function to run HYSPLIT particle trajectory code
-def traj(latPt,lonPt,hPt,year,month,day,hour,minute,trajHours,path_hysplit_code,metpath,outpath,setupFile,ASCFile,tName):
-    cwd = os.getcwd()
-    path_working = outpath+'working/'
-    os.system('cp '+setupFile+' '+path_working+'/SETUP.CFG')
-    os.system('cp '+ASCFile+' '+path_working+'/ASCDATA.CFG')
-    os.chdir(path_working)
-    
-    lat  = np.asarray([latPt])
-    lon  = np.asarray([lonPt])
-    MLHeight = np.asarray([hPt])
-    create_hysplit_control_file_multi(int(year),int(month),int(day),int(hour),int(minute),lat,lon,MLHeight,trajHours,path_hysplit_code,path_working,metpath)
-    
-    #Run HYSPLIT code
-    os.system(path_hysplit_code+'/exec/hyts_std')
-    os.system('chmod 777 '+path_working+'tdump')
-    print('mv '+path_working+'tdump '+outpath+tName)
-    os.system('mv '+path_working+'tdump '+outpath+tName)
-    os.chdir(cwd)
-
-def run_trajectory(latPt,lonPt,yyyy,mm,dd,hh,mn,hPt,trajHours,outpath,path_hysplit_code,metpath,setupFile,ASCFile,tName):
-    os.system('mkdir -p '+outpath)
-    path_working = outpath+'working/'
-    os.system('mkdir -p '+path_working)
-    
-    #Run both forward and backward trajectories
-    if trajHours.find('A') == 0:
-        #Backward
-        BACK_trajHours = int( (-1.)*abs( int( trajHours[1:] ) ) )
-        BACK_outpath = outpath + '/bwd/'
-        os.system('mkdir -p '+BACK_outpath)
-        path_working = BACK_outpath+'working/'
-        os.system('mkdir -p '+path_working)
-        BACK_tName = 'tdump_traj_BACKWARD_test.txt'
-        traj(latPt,lonPt,hPt,yyyy,mm,dd,hh,mn,BACK_trajHours,path_hysplit_code,metpath,BACK_outpath,setupFile,ASCFile,BACK_tName)
-        
-        #Forward
-        FOR_trajHours = int( abs( int( trajHours[1:] ) ) )
-        FOR_outpath = outpath + '/fwd/'
-        os.system('mkdir -p '+FOR_outpath)
-        path_working = FOR_outpath+'working/'
-        os.system('mkdir -p '+path_working)
-        FOR_tName = 'tdump_traj_FORWARD_test.txt'
-        traj(latPt,lonPt,hPt,yyyy,mm,dd,hh,mn,FOR_trajHours,path_hysplit_code,metpath,FOR_outpath,setupFile,ASCFile,FOR_tName)
-        
-        #Combine files
-        cFile = outpath+tName
-        fw = open(cFile,'w')
-        
-        file1 = open(BACK_outpath+BACK_tName,'r')
-        BWD_Lines = file1.readlines()
-        file1.close()
-        file1 = open(FOR_outpath+FOR_tName,'r')
-        FWD_Lines = file1.readlines()
-        file1.close()
-        
-        fwdI = 0
-        for i in range(len(FWD_Lines)):
-            fw.write(FWD_Lines[i])
-            if FWD_Lines[i].find('FORWARD') >= 0:
-                fw.write(FWD_Lines[i+1])
-                fw.write(FWD_Lines[i+2])
-                fwdI = i+2
-                break
-        bwdI = 0
-        for i in range(len(BWD_Lines)):
-            if BWD_Lines[i].find('BACKWARD') >= 0:
-                bwdI= i + 2
-                break
-        #Fetch lines
-        for i in reversed(range(bwdI+1,len(BWD_Lines))):
-            fw.write(BWD_Lines[i])
-        for i in range(fwdI+2,len(FWD_Lines)):
-            fw.write(FWD_Lines[i])
-        fw.close()
-        print('combined_file: '+cFile)
-    else:
-        traj(latPt,lonPt,hPt,yyyy,mm,dd,hh,mn,int(trajHours),path_hysplit_code,metpath,outpath,setupFile,ASCFile,tName)
-
-
-
-def read_trajectory_file(TRAJECTORY_FILE,fileFlag=0, rmfile=False):
-    file1 = open(TRAJECTORY_FILE,'r')
-    L = file1.readlines()
-    file1.close()
-    traj = {'n':[],'year':[],'month':[],'day':[],'hour':[],'minute':[],'jday':[],'lat':[],'lon':[],'alt':[],'timestep':[]}
-    for i in range(len(L)):
-        if (L[i].find('BACKWARD') > 0) | (L[i].find('FORWARD') > 0):
-            break
-    for i in range(i+3,len(L)):
-        traj['n'].append    ( int( (L[i].split())[0] ) )
-        traj['year'].append    ( int( (L[i].split())[2] )+2000 )
-        traj['month'].append   ( int( (L[i].split())[3] ) )
-        traj['day'].append     ( int( (L[i].split())[4] ) )
-        traj['hour'].append    ( int( (L[i].split())[5] ) )
-        traj['minute'].append  ( int( (L[i].split())[6] ) )
-        traj['timestep'].append( float( L[i].split()[8] ) )
-        traj['lat'].append( float( (L[i].split())[9] ) )
-        traj['lon'].append( float( (L[i].split())[10] ) )
-        traj['alt'].append( float( (L[i].split())[11] ) )
-        traj['jday'].append( datetime.datetime( int( (L[i].split())[2] )+2000, int( (L[i].split())[3] ), int( (L[i].split())[4] ), int( (L[i].split())[5] ), int( (L[i].split())[6] )))
-        
-    return traj
-
-
-
-
-
-
-#-----------------------------------------------------------------------------
-# RUN HYSPLIT trajectory
-#-----------------------------------------------------------------------------
-outpath='/home/users/pete_nut/test/'
-os.system('mkdir -p '+outpath)
-path_hysplit_code = '/home/users/pete_nut/hysplit.v5.0.0_CentOS/'
-metpath = '/gws/nopw/j04/eo_shared_data_vol1/reanalysis/ARL_noaa/reanalysis_data/ERA5/'
-setupFile = '/home/users/pete_nut/IV_shiptracks/matt_traj_code/SETUP_traj_lowcloud_15min.CFG'
-ASCFile  = '/home/users/pete_nut/IV_shiptracks/matt_traj_code/ASCDATA_JASMIN.CFG'
-
-lonInit = '-120.'
-latInit = '45.'
-year=2016
-month=6
-day=12
-hour=15
-minute=30
-jday = datetime.datetime(year,month,day,hour,minute)
-
-year   = str( jday.year  ).zfill(4)
-month  = str( jday.month ).zfill(2)
-day    = str( jday.day ).zfill(2)
-hour   = str( jday.hour ).zfill(2)
-minute = str( jday.minute ).zfill(2)
-height = '0.5' #midway through the PBL (for now)
-trajHours = 'A12'
-if trajHours.find('A') >= 0: tName = 'tdump_traj_ALLWARD_v0.txt'
-if trajHours.find('A') < 0:
-    if int(trajHours) > 0: tName = 'tdump_traj_FORWARD_v0.txt'
-if trajHours.find('A') < 0:
-    if int(trajHours) < 0: tName = 'tdump_traj_BACKWARD_v0.txt'
-
-EXP = year+month+day
-rootPath = outpath+EXP+'/'
-run_trajectory(latInit,lonInit,year,month,day,hour,minute,height,trajHours,rootPath,path_hysplit_code,metpath,setupFile,ASCFile,tName)
-trajectory = read_trajectory_file( rootPath + tName )
-print('timestep, latitude, longitude, time')
-for i in range(len(trajectory['lat'])): print(i,trajectory['timestep'][i],trajectory['lat'][i],trajectory['lon'][i],trajectory['jday'][i])
diff --git a/processing_pipeline/1_track_emissions.py b/processing_pipeline/1_track_emissions.py
index 038e6dc..62c2698 100755
--- a/processing_pipeline/1_track_emissions.py
+++ b/processing_pipeline/1_track_emissions.py
@@ -1,3 +1,15 @@
+# Name: Track Emissions from Gridded Dataset
+#
+# Description: python code using the trackpy library to convert 1-hourly gridmaps of emissions into ship trajectories
+#
+# Inputs:
+# emissions maps
+#
+# Output: trajectory csv
+#
+# Libraries: numpy, numba, xarray, pandas, trackpy, calendar
+#-------------------------------------------------------------------------------------------
+
 from __future__ import division, unicode_literals, print_function  # for compatibility with Python 2 and 3
 import numba
 import sys
diff --git a/processing_pipeline/2_advect_tracks_hysplit.py b/processing_pipeline/2_advect_tracks_hysplit.py
index 4db1bdd..b7c1225 100644
--- a/processing_pipeline/2_advect_tracks_hysplit.py
+++ b/processing_pipeline/2_advect_tracks_hysplit.py
@@ -238,8 +238,8 @@ def hysplit_traj(latInit, lonInit, jday):#, overpass_time):
     os.system('mkdir -p '+outpath)
     path_hysplit_code = '/home/users/pete_nut/hysplit.v5.0.0_CentOS/'
     metpath = '/gws/nopw/j04/eo_shared_data_vol1/reanalysis/ARL_noaa/reanalysis_data/ERA5/'
-    setupFile = '/home/users/pete_nut/IV_shiptracks/matt_traj_code/SETUP_traj_lowcloud_15min.CFG'
-    ASCFile  = '/home/users/pete_nut/IV_shiptracks/matt_traj_code/ASCDATA_JASMIN.CFG'
+    setupFile = '/home/users/pete_nut/IV_shiptracks/traj_code/SETUP_traj_lowcloud_15min.CFG'
+    ASCFile  = '/home/users/pete_nut/IV_shiptracks/traj_code/ASCDATA_JASMIN.CFG'
 
     year   = str( jday.year  ).zfill(4)
     month  = str( jday.month ).zfill(2)
diff --git a/processing_pipeline/3_interpolate_collocate_cfaw_hy.py b/processing_pipeline/3_interpolate_collocate_cfaw_hy.py
index 9481493..e2db2e3 100644
--- a/processing_pipeline/3_interpolate_collocate_cfaw_hy.py
+++ b/processing_pipeline/3_interpolate_collocate_cfaw_hy.py
@@ -1,3 +1,17 @@
+# Name: Collocate the advected trajectories with MODIS satellite data
+#
+# Description: Python code using the cis library to construct the trajectory at the time of the satellite overpass and collocate to the satellite imagery
+#
+# Inputs: advected emissions of the last 24h, MODIS data
+#
+# Output: collocated trajectories and cloud properties
+#
+# Modes: normal/null experiment, Aqua/Terra
+# 
+# Libraries: numpy, cis, xarray, pandas, pickle, calendar, datetime
+#-------------------------------------------------------------------------------------------
+
+
 from __future__ import division, unicode_literals, print_function  # for compatibility with Python 2 and 3
 import numba
 import numpy as np
@@ -10,7 +24,6 @@
 import pickle
 import trackpy as tp
 import xarray as xr
-# from get_reanalysis import get_profile_data, get_uv_data
 from cis.data_io.ungridded_data import UngriddedDataList
 import os
 os.environ['CIS_PLUGIN_HOME'] = '/home/users/pete_nut/plugins/'
diff --git a/processing_pipeline/4_change_file_format.py b/processing_pipeline/4_change_file_format.py
index 8984ccf..6ea57d9 100644
--- a/processing_pipeline/4_change_file_format.py
+++ b/processing_pipeline/4_change_file_format.py
@@ -1,3 +1,17 @@
+# Name: Post-processing the data
+#
+# Description: Python code adding derived data fields and summarising the outputs
+#
+# Inputs: collocated trajectories and cloud properties
+#
+# Output: trajectories and cloud properties monthly, aqua and terra, with added droplet number concentration, EIS, boolean masks for the regions of interest 
+# 
+# Modes: normal, null experiment
+#
+# Libraries: numpy, xarray, pandas, calendar, datetime
+#-------------------------------------------------------------------------------------------
+
+
 from __future__ import division, unicode_literals, print_function  # for compatibility with Python 2 and 3
 # import numba
 import glob
@@ -53,7 +67,8 @@ def subsume_month(month):
         print(month, day)
         try:
             new = pd.read_hdf(filename.format(year,month, day), key='df')
-            new = new.dropna(how='all', subset=['CTT','CTT_1', 'CTT_3'])
+            # drop some pure nan lines from the files
+            new = new.dropna(how='all', subset=['LWP','LWP_1', 'LWP_3'])
             new['terra']=0
             new.overpass=pd.to_datetime(new.overpass)
             interp_EIS = EIS.interp(time=xr.DataArray(new.overpass, dims='obs'), longitude=xr.DataArray(new.longitude, dims='obs'), latitude=xr.DataArray(new.latitude, dims='obs'))
@@ -66,7 +81,7 @@ def subsume_month(month):
 
         try:
             new = pd.read_hdf(filename_terra.format(year,month, day), key='df')
-            new = new.dropna(how='all', subset=['CTT','CTT_1', 'CTT_3'])
+            new = new.dropna(how='all', subset=['LWP','LWP_1', 'LWP_3'])
             new['terra']=1
             new.overpass=pd.to_datetime(new.overpass)
             interp_EIS = EIS.interp(time=xr.DataArray(new.overpass, dims='obs'), longitude=xr.DataArray(new.longitude, dims='obs'), latitude=xr.DataArray(new.latitude, dims='obs'))
@@ -78,13 +93,16 @@ def subsume_month(month):
     h5['ocean'] = np.logical_not(globe.is_land(h5.latitude.values, h5.longitude.values)) & np.logical_not(globe.is_land(h5.latitude.values+1, h5.longitude.values)) & np.logical_not(globe.is_land(h5.latitude.values, h5.longitude.values+1)) & np.logical_not(globe.is_land(h5.latitude.values-1, h5.longitude.values)) & np.logical_not(globe.is_land(h5.latitude.values, h5.longitude.values-1)) 
     h5=h5.set_index(pd.to_datetime(h5.index.values.astype("datetime64[ns]")))
     h5.overpass=pd.to_datetime(h5.overpass)
+    # add Nd calculated from COT and effective radius
     h5['Nd'] = (1.37e-5 * (h5.r_eff * 1e-6)**(-5/2)*h5.COT**(1/2) / 1e6)
     h5['Nd_1'] = (1.37e-5 * (h5.r_eff_1 * 1e-6)**(-5/2)* h5.COT_1 **(1/2) / 1e6)
     h5['Nd_3'] = (1.37e-5 * (h5.r_eff_3 * 1e-6)**(-5/2)* h5.COT_3 **(1/2) / 1e6)
+    #  add markers for the regions of interest
     h5['chil'] = condi(h5, [[-30., -17.],[ -82.5,-72.5]])
     h5['azor'] = condi(h5,[[30, 50],[ -40,-10]])
     h5['cver'] = condi(h5,[[10., 30.],[ -50,-20]])
     h5['ango'] = condi(h5,[[-30., -10.],[ -10.,15.]])
+    # add the difference between emission time and satellite overpass (how long the emissions have been advected for)
     h5['hours_diff']=pd.to_timedelta((h5.overpass-h5.index).values).seconds//3600
     
     print('writing')