-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathpams.py
143 lines (104 loc) · 3.74 KB
/
pams.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# Reads json data files from
# https://aqs.epa.gov/aqsweb/documents/data_api.html
import json
import pandas as pd
def open_dataset(filename):
""" Opens a json file, returns data array
Parameters
-----------------
filename: string
Full file path for json file
Returns
-----------------
data: Pandas DataFrame
DataFrame with all pertinent information
"""
jsonf = open_json(filename)
data = get_data(jsonf)
return data
def open_json(filename):
""" Opens the json file
Parameters
----------------
filename: string
Full file path for json file
Returns
----------------
jsonf: dictionary
Json file is opened and ready to be used by other functions in this code
Contains two dictionaries: 'Header' and 'Data'
"""
with open(filename) as f:
jsonf = json.load(f)
return jsonf
def get_header(jsonf):
"""Finds basic header information in json file
Parameters
----------------
jsonf: dictionary
Results
----------------
header: Pandas DataFrame
"""
header = jsonf['Header']
header = pd.DataFrame.from_dict(header)
return header
def get_data(jsonf):
""" Finds data in json file
Parameters
----------------
jsonf: dictionary
Results
----------------
data: Pandas DataFrame
DataFrame containing pertinent information
Date and Time are Datetime Objects
"""
dataf = jsonf['Data']
data = pd.DataFrame.from_dict(dataf)
# Combining state code, county code, and site number into one column
data['siteid'] = data.state_code.astype(str).str.zfill(
2)+data.county_code.astype(str).str.zfill(3)+data.site_number.astype(str).str.zfill(4)
# Combining date and time into one column
data['datetime_local'] = pd.to_datetime(data['date_local']+' '+data['time_local'])
data['datetime_utc'] = pd.to_datetime(data['date_gmt']+' '+data['time_gmt'])
# Renaming columns
data = data.rename(columns={'sample_measurement': 'obs',
'units_of_measure': 'units', 'units_of_measure_code': 'unit_code'})
# Dropping some columns, and reordering columns
data = data.drop(columns=['state_code', 'county_code', 'site_number', 'datum',
'qualifier', 'uncertainty', 'county', 'state', 'date_of_last_change',
'date_local', 'time_local', 'date_gmt', 'time_gmt', 'poc', 'unit_code',
'sample_duration_code', 'method_code'])
cols = data.columns.tolist()
cols.insert(0, cols.pop(cols.index('siteid')))
cols.insert(1, cols.pop(cols.index('latitude')))
cols.insert(2, cols.pop(cols.index('longitude')))
cols.insert(3, cols.pop(cols.index('datetime_local')))
cols.insert(4, cols.pop(cols.index('datetime_utc')))
data = data.reindex(columns=cols)
# Adjusting parameter units
units = data.units.unique()
for i in units:
con = data.units == i
if i.upper() == 'Parts per billion Carbon'.upper():
data.loc[con, 'units'] = 'ppbC'
if i == 'Parts per billion':
data.loc[con, 'units'] = 'ppb'
if i == 'Parts per million':
data.loc[con, 'units'] = 'ppm'
return data
def write_csv(array, filename):
"""Writes the data array to a csv file
Parameters
----------------
array: Pandas DataFrame
Can be any Pandas DataFrame
filename: string
Full path and filename of csv file
Returns
----------------
Generates csv file of specified name in specified location
"""
array.to_csv(filename, encoding='utf-8', index=False)
return 'csv file '+filename+' has been generated'