Skip to content

Commit

Permalink
Merge pull request #29 from aring1988/master
Browse files Browse the repository at this point in the history
Made necessary adjustments and removed unnecessary function
  • Loading branch information
amcz authored Feb 9, 2021
2 parents cc7ccdf + 98ef907 commit 53a569f
Showing 1 changed file with 43 additions and 61 deletions.
104 changes: 43 additions & 61 deletions monetio/obs/pams.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd


def open_dataset(filename):
def add_data(filename):
""" Opens a json file, returns data array
Parameters
Expand All @@ -17,12 +17,48 @@ def open_dataset(filename):
-----------------
data: Pandas DataFrame
DataFrame with all pertinent information
Date and Time are Datetime Objects
"""

jsonf = open_json(filename)
data = get_data(jsonf)
dataf = jsonf['Data']
data = pd.DataFrame.from_dict(dataf)

# Combining state code, county code, and site number into one column
data['siteid'] = data.state_code.astype(str).str.zfill(
2)+data.county_code.astype(str).str.zfill(3)+data.site_number.astype(str).str.zfill(4)

# Combining date and time into one column
data['datetime_local'] = pd.to_datetime(data['date_local']+' '+data['time_local'])
data['datetime_utc'] = pd.to_datetime(data['date_gmt']+' '+data['time_gmt'])

# Renaming columns
data = data.rename(columns={'sample_measurement': 'obs',
'units_of_measure': 'units', 'units_of_measure_code': 'unit_code'})

# Dropping some columns, and reordering columns
data = data.drop(columns=['state_code', 'county_code', 'site_number', 'datum',
'qualifier', 'uncertainty', 'county', 'state', 'date_of_last_change',
'date_local', 'time_local', 'date_gmt', 'time_gmt', 'poc', 'unit_code',
'sample_duration_code', 'method_code'])
cols = data.columns.tolist()
cols.insert(0, cols.pop(cols.index('siteid')))
cols.insert(1, cols.pop(cols.index('latitude')))
cols.insert(2, cols.pop(cols.index('longitude')))
cols.insert(3, cols.pop(cols.index('datetime_local')))
cols.insert(4, cols.pop(cols.index('datetime_utc')))
data = data.reindex(columns=cols)

# Adjusting parameter units
units = data.units.unique()
for i in units:
con = data.units == i
if i.upper() == 'Parts per billion Carbon'.upper():
data.loc[con, 'units'] = 'ppbC'
if i == 'Parts per billion':
data.loc[con, 'units'] = 'ppb'
if i == 'Parts per million':
data.loc[con, 'units'] = 'ppm'
return data


Expand All @@ -48,80 +84,26 @@ def open_json(filename):
return jsonf


def get_header(jsonf):
def get_header(filename):
"""Finds basic header information in json file
Parameters
----------------
jsonf: dictionary
filename: string
Full file path for json file
Results
----------------
header: Pandas DataFrame
"""

jsonf = open_json(filename)
header = jsonf['Header']
header = pd.DataFrame.from_dict(header)
return header


def get_data(jsonf):
""" Finds data in json file
Parameters
----------------
jsonf: dictionary
Results
----------------
data: Pandas DataFrame
DataFrame containing pertinent information
Date and Time are Datetime Objects
"""
dataf = jsonf['Data']
data = pd.DataFrame.from_dict(dataf)

# Combining state code, county code, and site number into one column
data['siteid'] = data.state_code.astype(str).str.zfill(
2)+data.county_code.astype(str).str.zfill(3)+data.site_number.astype(str).str.zfill(4)

# Combining date and time into one column
data['datetime_local'] = pd.to_datetime(data['date_local']+' '+data['time_local'])
data['datetime_utc'] = pd.to_datetime(data['date_gmt']+' '+data['time_gmt'])

# Renaming columns
data = data.rename(columns={'sample_measurement': 'obs',
'units_of_measure': 'units', 'units_of_measure_code': 'unit_code'})

# Dropping some columns, and reordering columns
data = data.drop(columns=['state_code', 'county_code', 'site_number', 'datum',
'qualifier', 'uncertainty', 'county', 'state', 'date_of_last_change',
'date_local', 'time_local', 'date_gmt', 'time_gmt', 'poc', 'unit_code',
'sample_duration_code', 'method_code'])
cols = data.columns.tolist()
cols.insert(0, cols.pop(cols.index('siteid')))
cols.insert(1, cols.pop(cols.index('latitude')))
cols.insert(2, cols.pop(cols.index('longitude')))
cols.insert(3, cols.pop(cols.index('datetime_local')))
cols.insert(4, cols.pop(cols.index('datetime_utc')))
data = data.reindex(columns=cols)

# Adjusting parameter units
units = data.units.unique()
for i in units:
con = data.units == i
if i.upper() == 'Parts per billion Carbon'.upper():
data.loc[con, 'units'] = 'ppbC'
if i == 'Parts per billion':
data.loc[con, 'units'] = 'ppb'
if i == 'Parts per million':
data.loc[con, 'units'] = 'ppm'
return data


def write_csv(array, filename):
"""Writes the data array to a csv file
Expand Down

0 comments on commit 53a569f

Please sign in to comment.