From ca10b52ad08d3c24472eb2546f927c9661cbcf01 Mon Sep 17 00:00:00 2001 From: aring1988 Date: Thu, 13 Aug 2020 13:19:38 -0400 Subject: [PATCH 1/5] Created reader for PAMS data --- monetio/__init__.py | 1 + monetio/obs/__init__.py | 4 +- monetio/obs/pams.py | 143 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 monetio/obs/pams.py diff --git a/monetio/__init__.py b/monetio/__init__.py index 72984672..500c0afc 100644 --- a/monetio/__init__.py +++ b/monetio/__init__.py @@ -11,6 +11,7 @@ ish_lite = obs.ish_lite nadp = obs.nadp openaq = obs.openaq +pams = obs.pams # models fv3chem = models.fv3chem diff --git a/monetio/obs/__init__.py b/monetio/obs/__init__.py index a657e839..5c09a5b5 100644 --- a/monetio/obs/__init__.py +++ b/monetio/obs/__init__.py @@ -1,9 +1,9 @@ from . import (aeronet, airnow, aqs, cems_mod, crn, epa_util, icartt, - improve_mod, ish, ish_lite, nadp, openaq) + improve_mod, ish, ish_lite, nadp, openaq, pams) __all__ = [ 'aeronet', 'airnow', 'aqs', 'crn', 'epa_util', 'improve_mod', 'ish', 'ish_lite' - 'cems_mod', 'nadp', 'openaq' + 'cems_mod', 'nadp', 'openaq', 'pams' ] __name__ = 'obs' diff --git a/monetio/obs/pams.py b/monetio/obs/pams.py new file mode 100644 index 00000000..aed87ec4 --- /dev/null +++ b/monetio/obs/pams.py @@ -0,0 +1,143 @@ +# Reads json data files from +# https://aqs.epa.gov/aqsweb/documents/data_api.html + +import json +import pandas as pd + + +def open_dataset(filename): + """ Opens a json file, returns data array + + Parameters + ----------------- + filename: string + Full file path for json file + + Returns + ----------------- + data: Pandas DataFrame + DataFrame with all pertinent information + + """ + + jsonf = open_json(filename) + data = get_data(jsonf) + + return data + + +def open_json(filename): + """ Opens the json file + + + Parameters + ---------------- + filename: string + Full file path for json file + + Returns + ---------------- + jsonf: dictionary + Json file is opened and ready to be used by other functions in this code + Contains two dictionaries: 'Header' and 'Data' + + """ + + with open(filename) as f: + jsonf = json.load(f) + return jsonf + + +def get_header(jsonf): + """Finds basic header information in json file + + + Parameters + ---------------- + jsonf: dictionary + + Results + ---------------- + header: Pandas DataFrame + + """ + + header = json['Header'] + header = pd.DataFrame.from_dict(header) + return header + + +def get_data(jsonf): + """ Finds data in json file + + Parameters + ---------------- + jsonf: dictionary + + Results + ---------------- + data: Pandas DataFrame + DataFrame containing pertinent information + Date and Time are Datetime Objects + + """ + dataf = jsonf['Data'] + data = pd.DataFrame.from_dict(dataf) + + # Combining state code, county code, and site number into one column + data['siteid'] = data.state_code.astype(str).str.zfill( + 2)+data.county_code.astype(str).str.zfill(3)+data.site_number.astype(str).str.zfill(4) + + # Combining date and time into one column + data['datetime_local'] = pd.to_datetime(data['date_local']+' '+data['time_local']) + data['datetime_utc'] = pd.to_datetime(data['date_gmt']+' '+data['time_gmt']) + + # Renaming columns + data = data.rename(columns={'sample_measurement': 'obs', + 'units_of_measure': 'units', 'units_of_measure_code': 'unit_code'}) + + # Dropping some columns, and reordering columns + data = data.drop(columns=['state_code', 'county_code', 'site_number', 'datum', + 'qualifier', 'uncertainty', 'county', 'state', 'date_of_last_change', + 'date_local', 'time_local', 'date_gmt', 'time_gmt', 'poc', 'unit_code', + 'sample_duration_code', 'method_code']) + cols = data.columns.tolist() + cols.insert(0, cols.pop(cols.index('siteid'))) + cols.insert(1, cols.pop(cols.index('latitude'))) + cols.insert(2, cols.pop(cols.index('longitude'))) + cols.insert(3, cols.pop(cols.index('datetime_local'))) + cols.insert(4, cols.pop(cols.index('datetime_utc'))) + data = data.reindex(columns=cols) + + # Adjusting parameter units + units = data.units.unique() + for i in units: + con = data.units == i + if i.upper() == 'Parts per billion Carbon'.upper(): + data.loc[con, 'units'] = 'ppbC' + if i == 'Parts per billion': + data.loc[con, 'units'] = 'ppb' + if i == 'Parts per million': + data.loc[con, 'units'] = 'ppm' + return data + + +def write_csv(array, filename): + """Writes the data array to a csv file + + + Parameters + ---------------- + array: Pandas DataFrame + Can be any Pandas DataFrame + + filename: string + Full path and filename of csv file + + Returns + ---------------- + Generates csv file of specified name in specified location + + """ + array.to_csv(filename, encoding='utf-8', index=False) + return 'csv file '+filename+' has been generated' From 5f7918d4fe93ccb358ad11139f4e79fe6f96f516 Mon Sep 17 00:00:00 2001 From: aring1988 Date: Thu, 13 Aug 2020 14:45:46 -0400 Subject: [PATCH 2/5] Fixed minor typo --- monetio/obs/pams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monetio/obs/pams.py b/monetio/obs/pams.py index aed87ec4..05503de6 100644 --- a/monetio/obs/pams.py +++ b/monetio/obs/pams.py @@ -62,7 +62,7 @@ def get_header(jsonf): """ - header = json['Header'] + header = jsonf['Header'] header = pd.DataFrame.from_dict(header) return header From 64068954f68ac4e36c906c3f0573b2762fed892f Mon Sep 17 00:00:00 2001 From: amcz Date: Tue, 12 Jan 2021 15:32:30 -0500 Subject: [PATCH 3/5] removed print statement from pardump.py --- monetio/models/pardump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monetio/models/pardump.py b/monetio/models/pardump.py index b7f6d51f..6e086055 100755 --- a/monetio/models/pardump.py +++ b/monetio/models/pardump.py @@ -210,7 +210,7 @@ def read(self, drange=None, verbose=False, century=2000, sorti=None): # Only store data if it is in the daterange specified. if testdate: - print("Adding data ", hdata, pdate) + #print("Adding data ", hdata, pdate) # otherwise get endian error message when create dataframe. ndata = data.byteswap().newbyteorder() par_frame = pd.DataFrame.from_records(ndata) # create data frame From eff753d1d553d8816bf0fb76503c38d13c432c4e Mon Sep 17 00:00:00 2001 From: amcz Date: Thu, 4 Feb 2021 09:18:09 -0500 Subject: [PATCH 4/5] hysplit.py changed where self.atthash["Species ID"] is initialized. It was initialized inside the time loop and when last time periods contained no data, it was then empty. This caused problems for the add_species function. --- monetio/models/hysplit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monetio/models/hysplit.py b/monetio/models/hysplit.py index 1985f8b5..169951f9 100644 --- a/monetio/models/hysplit.py +++ b/monetio/models/hysplit.py @@ -519,6 +519,7 @@ class and need to be converted to a python np.fromfile(fid, dtype=rec5b, count=hdata5a["pollnum"][0]) np.fromfile(fid, dtype=rec5c, count=1) self.atthash["Number of Species"] = hdata5a["pollnum"][0] + self.atthash["Species ID"] = [] # Loop to reads records 6-8. Number of loops is equal to number of # output times. @@ -541,7 +542,6 @@ class and need to be converted to a python if verbose: print("sample time", pdate1, " to ", pdate2) # datelist = [] - self.atthash["Species ID"] = [] inc_iii = False # LOOP to go through each level for lev in range(self.atthash["Number of Levels"]): From 98ef907b07151615ffa1deace038a4b142c40b04 Mon Sep 17 00:00:00 2001 From: aring1988 Date: Mon, 8 Feb 2021 16:57:07 -0500 Subject: [PATCH 5/5] Made necessary adjustments and removed unnecessary function --- monetio/obs/pams.py | 104 ++++++++++++++++++-------------------------- 1 file changed, 43 insertions(+), 61 deletions(-) diff --git a/monetio/obs/pams.py b/monetio/obs/pams.py index 05503de6..7aa0bf36 100644 --- a/monetio/obs/pams.py +++ b/monetio/obs/pams.py @@ -5,7 +5,7 @@ import pandas as pd -def open_dataset(filename): +def add_data(filename): """ Opens a json file, returns data array Parameters @@ -17,12 +17,48 @@ def open_dataset(filename): ----------------- data: Pandas DataFrame DataFrame with all pertinent information - + Date and Time are Datetime Objects """ jsonf = open_json(filename) - data = get_data(jsonf) + dataf = jsonf['Data'] + data = pd.DataFrame.from_dict(dataf) + + # Combining state code, county code, and site number into one column + data['siteid'] = data.state_code.astype(str).str.zfill( + 2)+data.county_code.astype(str).str.zfill(3)+data.site_number.astype(str).str.zfill(4) + + # Combining date and time into one column + data['datetime_local'] = pd.to_datetime(data['date_local']+' '+data['time_local']) + data['datetime_utc'] = pd.to_datetime(data['date_gmt']+' '+data['time_gmt']) + + # Renaming columns + data = data.rename(columns={'sample_measurement': 'obs', + 'units_of_measure': 'units', 'units_of_measure_code': 'unit_code'}) + # Dropping some columns, and reordering columns + data = data.drop(columns=['state_code', 'county_code', 'site_number', 'datum', + 'qualifier', 'uncertainty', 'county', 'state', 'date_of_last_change', + 'date_local', 'time_local', 'date_gmt', 'time_gmt', 'poc', 'unit_code', + 'sample_duration_code', 'method_code']) + cols = data.columns.tolist() + cols.insert(0, cols.pop(cols.index('siteid'))) + cols.insert(1, cols.pop(cols.index('latitude'))) + cols.insert(2, cols.pop(cols.index('longitude'))) + cols.insert(3, cols.pop(cols.index('datetime_local'))) + cols.insert(4, cols.pop(cols.index('datetime_utc'))) + data = data.reindex(columns=cols) + + # Adjusting parameter units + units = data.units.unique() + for i in units: + con = data.units == i + if i.upper() == 'Parts per billion Carbon'.upper(): + data.loc[con, 'units'] = 'ppbC' + if i == 'Parts per billion': + data.loc[con, 'units'] = 'ppb' + if i == 'Parts per million': + data.loc[con, 'units'] = 'ppm' return data @@ -48,80 +84,26 @@ def open_json(filename): return jsonf -def get_header(jsonf): +def get_header(filename): """Finds basic header information in json file Parameters ---------------- - jsonf: dictionary + filename: string + Full file path for json file Results ---------------- header: Pandas DataFrame """ - + jsonf = open_json(filename) header = jsonf['Header'] header = pd.DataFrame.from_dict(header) return header -def get_data(jsonf): - """ Finds data in json file - - Parameters - ---------------- - jsonf: dictionary - - Results - ---------------- - data: Pandas DataFrame - DataFrame containing pertinent information - Date and Time are Datetime Objects - - """ - dataf = jsonf['Data'] - data = pd.DataFrame.from_dict(dataf) - - # Combining state code, county code, and site number into one column - data['siteid'] = data.state_code.astype(str).str.zfill( - 2)+data.county_code.astype(str).str.zfill(3)+data.site_number.astype(str).str.zfill(4) - - # Combining date and time into one column - data['datetime_local'] = pd.to_datetime(data['date_local']+' '+data['time_local']) - data['datetime_utc'] = pd.to_datetime(data['date_gmt']+' '+data['time_gmt']) - - # Renaming columns - data = data.rename(columns={'sample_measurement': 'obs', - 'units_of_measure': 'units', 'units_of_measure_code': 'unit_code'}) - - # Dropping some columns, and reordering columns - data = data.drop(columns=['state_code', 'county_code', 'site_number', 'datum', - 'qualifier', 'uncertainty', 'county', 'state', 'date_of_last_change', - 'date_local', 'time_local', 'date_gmt', 'time_gmt', 'poc', 'unit_code', - 'sample_duration_code', 'method_code']) - cols = data.columns.tolist() - cols.insert(0, cols.pop(cols.index('siteid'))) - cols.insert(1, cols.pop(cols.index('latitude'))) - cols.insert(2, cols.pop(cols.index('longitude'))) - cols.insert(3, cols.pop(cols.index('datetime_local'))) - cols.insert(4, cols.pop(cols.index('datetime_utc'))) - data = data.reindex(columns=cols) - - # Adjusting parameter units - units = data.units.unique() - for i in units: - con = data.units == i - if i.upper() == 'Parts per billion Carbon'.upper(): - data.loc[con, 'units'] = 'ppbC' - if i == 'Parts per billion': - data.loc[con, 'units'] = 'ppb' - if i == 'Parts per million': - data.loc[con, 'units'] = 'ppm' - return data - - def write_csv(array, filename): """Writes the data array to a csv file