From ca10b52ad08d3c24472eb2546f927c9661cbcf01 Mon Sep 17 00:00:00 2001
From: aring1988 <aring1@umd.edu>
Date: Thu, 13 Aug 2020 13:19:38 -0400
Subject: [PATCH 1/5] Created reader for PAMS data

---
 monetio/__init__.py     |   1 +
 monetio/obs/__init__.py |   4 +-
 monetio/obs/pams.py     | 143 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 146 insertions(+), 2 deletions(-)
 create mode 100644 monetio/obs/pams.py

diff --git a/monetio/__init__.py b/monetio/__init__.py
index 72984672..500c0afc 100644
--- a/monetio/__init__.py
+++ b/monetio/__init__.py
@@ -11,6 +11,7 @@
 ish_lite = obs.ish_lite
 nadp = obs.nadp
 openaq = obs.openaq
+pams = obs.pams
 
 # models
 fv3chem = models.fv3chem
diff --git a/monetio/obs/__init__.py b/monetio/obs/__init__.py
index a657e839..5c09a5b5 100644
--- a/monetio/obs/__init__.py
+++ b/monetio/obs/__init__.py
@@ -1,9 +1,9 @@
 from . import (aeronet, airnow, aqs, cems_mod, crn, epa_util, icartt,
-               improve_mod, ish, ish_lite, nadp, openaq)
+               improve_mod, ish, ish_lite, nadp, openaq, pams)
 
 __all__ = [
     'aeronet', 'airnow', 'aqs', 'crn', 'epa_util', 'improve_mod', 'ish', 'ish_lite'
-    'cems_mod', 'nadp', 'openaq'
+    'cems_mod', 'nadp', 'openaq', 'pams'
 ]
 
 __name__ = 'obs'
diff --git a/monetio/obs/pams.py b/monetio/obs/pams.py
new file mode 100644
index 00000000..aed87ec4
--- /dev/null
+++ b/monetio/obs/pams.py
@@ -0,0 +1,143 @@
+# Reads json data files from
+# https://aqs.epa.gov/aqsweb/documents/data_api.html
+
+import json
+import pandas as pd
+
+
+def open_dataset(filename):
+    """ Opens a json file, returns data array
+
+    Parameters
+    -----------------
+    filename: string
+            Full file path for json file
+
+    Returns
+    -----------------
+    data: Pandas DataFrame
+            DataFrame with all pertinent information
+
+    """
+
+    jsonf = open_json(filename)
+    data = get_data(jsonf)
+
+    return data
+
+
+def open_json(filename):
+    """ Opens the json file
+
+
+    Parameters
+    ----------------
+    filename: string
+           Full file path for json file
+
+    Returns
+    ----------------
+    jsonf: dictionary
+           Json file is opened and ready to be used by other functions in this code
+           Contains two dictionaries: 'Header' and 'Data'
+
+    """
+
+    with open(filename) as f:
+        jsonf = json.load(f)
+    return jsonf
+
+
+def get_header(jsonf):
+    """Finds basic header information in json file
+
+
+    Parameters
+    ----------------
+    jsonf: dictionary
+
+    Results
+    ----------------
+    header: Pandas DataFrame
+
+    """
+
+    header = json['Header']
+    header = pd.DataFrame.from_dict(header)
+    return header
+
+
+def get_data(jsonf):
+    """ Finds data in json file
+
+    Parameters
+    ----------------
+    jsonf: dictionary
+
+    Results
+    ----------------
+    data: Pandas DataFrame
+             DataFrame containing pertinent information
+             Date and Time are Datetime Objects
+
+    """
+    dataf = jsonf['Data']
+    data = pd.DataFrame.from_dict(dataf)
+
+    # Combining state code, county code, and site number into one column
+    data['siteid'] = data.state_code.astype(str).str.zfill(
+        2)+data.county_code.astype(str).str.zfill(3)+data.site_number.astype(str).str.zfill(4)
+
+    # Combining date and time into one column
+    data['datetime_local'] = pd.to_datetime(data['date_local']+' '+data['time_local'])
+    data['datetime_utc'] = pd.to_datetime(data['date_gmt']+' '+data['time_gmt'])
+
+    # Renaming columns
+    data = data.rename(columns={'sample_measurement': 'obs',
+                                'units_of_measure': 'units', 'units_of_measure_code': 'unit_code'})
+
+    # Dropping some columns, and reordering columns
+    data = data.drop(columns=['state_code', 'county_code', 'site_number', 'datum',
+                              'qualifier', 'uncertainty', 'county', 'state', 'date_of_last_change',
+                              'date_local', 'time_local', 'date_gmt', 'time_gmt', 'poc', 'unit_code',
+                              'sample_duration_code', 'method_code'])
+    cols = data.columns.tolist()
+    cols.insert(0, cols.pop(cols.index('siteid')))
+    cols.insert(1, cols.pop(cols.index('latitude')))
+    cols.insert(2, cols.pop(cols.index('longitude')))
+    cols.insert(3, cols.pop(cols.index('datetime_local')))
+    cols.insert(4, cols.pop(cols.index('datetime_utc')))
+    data = data.reindex(columns=cols)
+
+    # Adjusting parameter units
+    units = data.units.unique()
+    for i in units:
+        con = data.units == i
+        if i.upper() == 'Parts per billion Carbon'.upper():
+            data.loc[con, 'units'] = 'ppbC'
+        if i == 'Parts per billion':
+            data.loc[con, 'units'] = 'ppb'
+        if i == 'Parts per million':
+            data.loc[con, 'units'] = 'ppm'
+    return data
+
+
+def write_csv(array, filename):
+    """Writes the data array to a csv file
+
+
+    Parameters
+    ----------------
+    array: Pandas DataFrame
+            Can be any Pandas DataFrame
+
+    filename: string
+            Full path and filename of csv file
+
+    Returns
+    ----------------
+    Generates csv file of specified name in specified location
+
+    """
+    array.to_csv(filename, encoding='utf-8', index=False)
+    return 'csv file '+filename+' has been generated'

From 5f7918d4fe93ccb358ad11139f4e79fe6f96f516 Mon Sep 17 00:00:00 2001
From: aring1988 <aring1@umd.edu>
Date: Thu, 13 Aug 2020 14:45:46 -0400
Subject: [PATCH 2/5] Fixed minor typo

---
 monetio/obs/pams.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monetio/obs/pams.py b/monetio/obs/pams.py
index aed87ec4..05503de6 100644
--- a/monetio/obs/pams.py
+++ b/monetio/obs/pams.py
@@ -62,7 +62,7 @@ def get_header(jsonf):
 
     """
 
-    header = json['Header']
+    header = jsonf['Header']
     header = pd.DataFrame.from_dict(header)
     return header
 

From 64068954f68ac4e36c906c3f0573b2762fed892f Mon Sep 17 00:00:00 2001
From: amcz <alicec@umd.edu>
Date: Tue, 12 Jan 2021 15:32:30 -0500
Subject: [PATCH 3/5] removed print statement from pardump.py

---
 monetio/models/pardump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monetio/models/pardump.py b/monetio/models/pardump.py
index b7f6d51f..6e086055 100755
--- a/monetio/models/pardump.py
+++ b/monetio/models/pardump.py
@@ -210,7 +210,7 @@ def read(self, drange=None, verbose=False, century=2000, sorti=None):
 
                 # Only store data if it is in the daterange specified.
                 if testdate:
-                    print("Adding data ", hdata, pdate)
+                    #print("Adding data ", hdata, pdate)
                     # otherwise get endian error message when create dataframe.
                     ndata = data.byteswap().newbyteorder()
                     par_frame = pd.DataFrame.from_records(ndata)  # create data frame

From eff753d1d553d8816bf0fb76503c38d13c432c4e Mon Sep 17 00:00:00 2001
From: amcz <alicec@umd.edu>
Date: Thu, 4 Feb 2021 09:18:09 -0500
Subject: [PATCH 4/5] hysplit.py changed where self.atthash["Species ID"] is
 initialized. It was initialized inside the time loop and when last time
 periods contained no data, it was then empty. This caused problems for the
 add_species function.

---
 monetio/models/hysplit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monetio/models/hysplit.py b/monetio/models/hysplit.py
index 1985f8b5..169951f9 100644
--- a/monetio/models/hysplit.py
+++ b/monetio/models/hysplit.py
@@ -519,6 +519,7 @@ class and need to be converted to a python
         np.fromfile(fid, dtype=rec5b, count=hdata5a["pollnum"][0])
         np.fromfile(fid, dtype=rec5c, count=1)
         self.atthash["Number of Species"] = hdata5a["pollnum"][0]
+        self.atthash["Species ID"] = []
 
         # Loop to reads records 6-8. Number of loops is equal to number of
         # output times.
@@ -541,7 +542,6 @@ class and need to be converted to a python
             if verbose:
                 print("sample time", pdate1, " to ", pdate2)
             # datelist = []
-            self.atthash["Species ID"] = []
             inc_iii = False
             # LOOP to go through each level
             for lev in range(self.atthash["Number of Levels"]):

From 98ef907b07151615ffa1deace038a4b142c40b04 Mon Sep 17 00:00:00 2001
From: aring1988 <aring1@umd.edu>
Date: Mon, 8 Feb 2021 16:57:07 -0500
Subject: [PATCH 5/5] Made necessary adjustments and removed unnecessary
 function

---
 monetio/obs/pams.py | 104 ++++++++++++++++++--------------------------
 1 file changed, 43 insertions(+), 61 deletions(-)

diff --git a/monetio/obs/pams.py b/monetio/obs/pams.py
index 05503de6..7aa0bf36 100644
--- a/monetio/obs/pams.py
+++ b/monetio/obs/pams.py
@@ -5,7 +5,7 @@
 import pandas as pd
 
 
-def open_dataset(filename):
+def add_data(filename):
     """ Opens a json file, returns data array
 
     Parameters
@@ -17,12 +17,48 @@ def open_dataset(filename):
     -----------------
     data: Pandas DataFrame
             DataFrame with all pertinent information
-
+            Date and Time are Datetime Objects
     """
 
     jsonf = open_json(filename)
-    data = get_data(jsonf)
+    dataf = jsonf['Data']
+    data = pd.DataFrame.from_dict(dataf)
+
+    # Combining state code, county code, and site number into one column
+    data['siteid'] = data.state_code.astype(str).str.zfill(
+        2)+data.county_code.astype(str).str.zfill(3)+data.site_number.astype(str).str.zfill(4)
+
+    # Combining date and time into one column
+    data['datetime_local'] = pd.to_datetime(data['date_local']+' '+data['time_local'])
+    data['datetime_utc'] = pd.to_datetime(data['date_gmt']+' '+data['time_gmt'])
+
+    # Renaming columns
+    data = data.rename(columns={'sample_measurement': 'obs',
+                                'units_of_measure': 'units', 'units_of_measure_code': 'unit_code'})
 
+    # Dropping some columns, and reordering columns
+    data = data.drop(columns=['state_code', 'county_code', 'site_number', 'datum',
+                              'qualifier', 'uncertainty', 'county', 'state', 'date_of_last_change',
+                              'date_local', 'time_local', 'date_gmt', 'time_gmt', 'poc', 'unit_code',
+                              'sample_duration_code', 'method_code'])
+    cols = data.columns.tolist()
+    cols.insert(0, cols.pop(cols.index('siteid')))
+    cols.insert(1, cols.pop(cols.index('latitude')))
+    cols.insert(2, cols.pop(cols.index('longitude')))
+    cols.insert(3, cols.pop(cols.index('datetime_local')))
+    cols.insert(4, cols.pop(cols.index('datetime_utc')))
+    data = data.reindex(columns=cols)
+
+    # Adjusting parameter units
+    units = data.units.unique()
+    for i in units:
+        con = data.units == i
+        if i.upper() == 'Parts per billion Carbon'.upper():
+            data.loc[con, 'units'] = 'ppbC'
+        if i == 'Parts per billion':
+            data.loc[con, 'units'] = 'ppb'
+        if i == 'Parts per million':
+            data.loc[con, 'units'] = 'ppm'
     return data
 
 
@@ -48,80 +84,26 @@ def open_json(filename):
     return jsonf
 
 
-def get_header(jsonf):
+def get_header(filename):
     """Finds basic header information in json file
 
 
     Parameters
     ----------------
-    jsonf: dictionary
+    filename: string
+           Full file path for json file
 
     Results
     ----------------
     header: Pandas DataFrame
 
     """
-
+    jsonf = open_json(filename)
     header = jsonf['Header']
     header = pd.DataFrame.from_dict(header)
     return header
 
 
-def get_data(jsonf):
-    """ Finds data in json file
-
-    Parameters
-    ----------------
-    jsonf: dictionary
-
-    Results
-    ----------------
-    data: Pandas DataFrame
-             DataFrame containing pertinent information
-             Date and Time are Datetime Objects
-
-    """
-    dataf = jsonf['Data']
-    data = pd.DataFrame.from_dict(dataf)
-
-    # Combining state code, county code, and site number into one column
-    data['siteid'] = data.state_code.astype(str).str.zfill(
-        2)+data.county_code.astype(str).str.zfill(3)+data.site_number.astype(str).str.zfill(4)
-
-    # Combining date and time into one column
-    data['datetime_local'] = pd.to_datetime(data['date_local']+' '+data['time_local'])
-    data['datetime_utc'] = pd.to_datetime(data['date_gmt']+' '+data['time_gmt'])
-
-    # Renaming columns
-    data = data.rename(columns={'sample_measurement': 'obs',
-                                'units_of_measure': 'units', 'units_of_measure_code': 'unit_code'})
-
-    # Dropping some columns, and reordering columns
-    data = data.drop(columns=['state_code', 'county_code', 'site_number', 'datum',
-                              'qualifier', 'uncertainty', 'county', 'state', 'date_of_last_change',
-                              'date_local', 'time_local', 'date_gmt', 'time_gmt', 'poc', 'unit_code',
-                              'sample_duration_code', 'method_code'])
-    cols = data.columns.tolist()
-    cols.insert(0, cols.pop(cols.index('siteid')))
-    cols.insert(1, cols.pop(cols.index('latitude')))
-    cols.insert(2, cols.pop(cols.index('longitude')))
-    cols.insert(3, cols.pop(cols.index('datetime_local')))
-    cols.insert(4, cols.pop(cols.index('datetime_utc')))
-    data = data.reindex(columns=cols)
-
-    # Adjusting parameter units
-    units = data.units.unique()
-    for i in units:
-        con = data.units == i
-        if i.upper() == 'Parts per billion Carbon'.upper():
-            data.loc[con, 'units'] = 'ppbC'
-        if i == 'Parts per billion':
-            data.loc[con, 'units'] = 'ppb'
-        if i == 'Parts per million':
-            data.loc[con, 'units'] = 'ppm'
-    return data
-
-
 def write_csv(array, filename):
     """Writes the data array to a csv file