From ca10b52ad08d3c24472eb2546f927c9661cbcf01 Mon Sep 17 00:00:00 2001 From: aring1988 Date: Thu, 13 Aug 2020 13:19:38 -0400 Subject: [PATCH 1/2] Created reader for PAMS data --- monetio/__init__.py | 1 + monetio/obs/__init__.py | 4 +- monetio/obs/pams.py | 143 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 monetio/obs/pams.py diff --git a/monetio/__init__.py b/monetio/__init__.py index 72984672..500c0afc 100644 --- a/monetio/__init__.py +++ b/monetio/__init__.py @@ -11,6 +11,7 @@ ish_lite = obs.ish_lite nadp = obs.nadp openaq = obs.openaq +pams = obs.pams # models fv3chem = models.fv3chem diff --git a/monetio/obs/__init__.py b/monetio/obs/__init__.py index a657e839..5c09a5b5 100644 --- a/monetio/obs/__init__.py +++ b/monetio/obs/__init__.py @@ -1,9 +1,9 @@ from . import (aeronet, airnow, aqs, cems_mod, crn, epa_util, icartt, - improve_mod, ish, ish_lite, nadp, openaq) + improve_mod, ish, ish_lite, nadp, openaq, pams) __all__ = [ 'aeronet', 'airnow', 'aqs', 'crn', 'epa_util', 'improve_mod', 'ish', 'ish_lite' - 'cems_mod', 'nadp', 'openaq' + 'cems_mod', 'nadp', 'openaq', 'pams' ] __name__ = 'obs' diff --git a/monetio/obs/pams.py b/monetio/obs/pams.py new file mode 100644 index 00000000..aed87ec4 --- /dev/null +++ b/monetio/obs/pams.py @@ -0,0 +1,143 @@ +# Reads json data files from +# https://aqs.epa.gov/aqsweb/documents/data_api.html + +import json +import pandas as pd + + +def open_dataset(filename): + """ Opens a json file, returns data array + + Parameters + ----------------- + filename: string + Full file path for json file + + Returns + ----------------- + data: Pandas DataFrame + DataFrame with all pertinent information + + """ + + jsonf = open_json(filename) + data = get_data(jsonf) + + return data + + +def open_json(filename): + """ Opens the json file + + + Parameters + ---------------- + filename: string + Full file path for json file + + Returns + ---------------- + jsonf: dictionary + Json file is opened and ready to be used by other functions in this code + Contains two dictionaries: 'Header' and 'Data' + + """ + + with open(filename) as f: + jsonf = json.load(f) + return jsonf + + +def get_header(jsonf): + """Finds basic header information in json file + + + Parameters + ---------------- + jsonf: dictionary + + Results + ---------------- + header: Pandas DataFrame + + """ + + header = json['Header'] + header = pd.DataFrame.from_dict(header) + return header + + +def get_data(jsonf): + """ Finds data in json file + + Parameters + ---------------- + jsonf: dictionary + + Results + ---------------- + data: Pandas DataFrame + DataFrame containing pertinent information + Date and Time are Datetime Objects + + """ + dataf = jsonf['Data'] + data = pd.DataFrame.from_dict(dataf) + + # Combining state code, county code, and site number into one column + data['siteid'] = data.state_code.astype(str).str.zfill( + 2)+data.county_code.astype(str).str.zfill(3)+data.site_number.astype(str).str.zfill(4) + + # Combining date and time into one column + data['datetime_local'] = pd.to_datetime(data['date_local']+' '+data['time_local']) + data['datetime_utc'] = pd.to_datetime(data['date_gmt']+' '+data['time_gmt']) + + # Renaming columns + data = data.rename(columns={'sample_measurement': 'obs', + 'units_of_measure': 'units', 'units_of_measure_code': 'unit_code'}) + + # Dropping some columns, and reordering columns + data = data.drop(columns=['state_code', 'county_code', 'site_number', 'datum', + 'qualifier', 'uncertainty', 'county', 'state', 'date_of_last_change', + 'date_local', 'time_local', 'date_gmt', 'time_gmt', 'poc', 'unit_code', + 'sample_duration_code', 'method_code']) + cols = data.columns.tolist() + cols.insert(0, cols.pop(cols.index('siteid'))) + cols.insert(1, cols.pop(cols.index('latitude'))) + cols.insert(2, cols.pop(cols.index('longitude'))) + cols.insert(3, cols.pop(cols.index('datetime_local'))) + cols.insert(4, cols.pop(cols.index('datetime_utc'))) + data = data.reindex(columns=cols) + + # Adjusting parameter units + units = data.units.unique() + for i in units: + con = data.units == i + if i.upper() == 'Parts per billion Carbon'.upper(): + data.loc[con, 'units'] = 'ppbC' + if i == 'Parts per billion': + data.loc[con, 'units'] = 'ppb' + if i == 'Parts per million': + data.loc[con, 'units'] = 'ppm' + return data + + +def write_csv(array, filename): + """Writes the data array to a csv file + + + Parameters + ---------------- + array: Pandas DataFrame + Can be any Pandas DataFrame + + filename: string + Full path and filename of csv file + + Returns + ---------------- + Generates csv file of specified name in specified location + + """ + array.to_csv(filename, encoding='utf-8', index=False) + return 'csv file '+filename+' has been generated' From 5f7918d4fe93ccb358ad11139f4e79fe6f96f516 Mon Sep 17 00:00:00 2001 From: aring1988 Date: Thu, 13 Aug 2020 14:45:46 -0400 Subject: [PATCH 2/2] Fixed minor typo --- monetio/obs/pams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monetio/obs/pams.py b/monetio/obs/pams.py index aed87ec4..05503de6 100644 --- a/monetio/obs/pams.py +++ b/monetio/obs/pams.py @@ -62,7 +62,7 @@ def get_header(jsonf): """ - header = json['Header'] + header = jsonf['Header'] header = pd.DataFrame.from_dict(header) return header