forked from noaa-oar-arl/monetio
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Update raqms.py tracking problems in colab * Update __init__.py * Update __init__.py * Update __init__.py * trying to make Kaggle import work * Update __init__.py * Update __init__.py * Update __init__.py * Update __init__.py * Update __init__.py * Update __init__.py * Update __init__.py * Update __init__.py * Update __init__.py * Update __init__.py * Update __init__.py * Update __init__.py * Update setup.cfg * Update setup.cfg * Update __init__.py * Update raqms.py adjusting timestamp. Needed for pairing. Unsure if this was a temporary fix sort of thing * Update raqms.py * Update raqms.py * Update raqms.py * Update omps_nadir.py * Update omps_nadir.py * data handling issue fix * Update omps_nadir.py * Update omps_nadir.py * add trimmed file reader for earthcube notebook * Update raqms.py * Update raqms.py * Update raqms.py * merge fixes * trying to fix for merge * Adding MOPITT read * tropomi no2 reader * rename * adding hdf close file * omps level 3 reader * Update raqms.py remove double of _fix_pres --------- Co-authored-by: mebruckner <48494069+mebruckner@users.noreply.github.com> Co-authored-by: Maggie Bruckner <mbruckner@raqms-ops.ssec.wisc.edu> Co-authored-by: Meng Li <mengli@MengM1.local>
- Loading branch information
1 parent
52e9873
commit 82d5565
Showing
4 changed files
with
353 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
""" MOPITT gridded data File reader | ||
updated 2022-10 rrb | ||
* DataSet instead of DataArray | ||
created 2021-12 rrb | ||
""" | ||
import pandas as pd | ||
import xarray as xr | ||
import numpy as np | ||
import h5py | ||
import glob | ||
import os | ||
|
||
|
||
|
||
def getStartTime(filename): | ||
"""Method to read the time in MOPITT level 3 hdf files. | ||
Parameters | ||
---------- | ||
filename : string or list | ||
filename is the path to the file | ||
Returns | ||
------- | ||
startTime """ | ||
|
||
structure ='/HDFEOS/ADDITIONAL/FILE_ATTRIBUTES' | ||
#print("READING FILE " + inFileName) | ||
fName = os.path.basename(filename) | ||
|
||
try: | ||
inFile = h5py.File(filename,'r') | ||
except: | ||
print("ERROR: CANNOT OPEN " + filename) | ||
return 0 | ||
|
||
grp = inFile[structure] | ||
k = grp.attrs | ||
startTimeBytes = k.get("StartTime",default=None) | ||
startTime = pd.to_datetime(startTimeBytes[0], unit='s', origin='1993-01-01 00:00:00') | ||
#print("******************", startTime) | ||
|
||
try: | ||
inFile.close() | ||
except: | ||
print("ERROR CANNOT CLOSE " + filename) | ||
return 0 | ||
|
||
return startTime | ||
|
||
|
||
def loadAndExtractGriddedHDF(filename,varname): | ||
"""Method to open MOPITT gridded hdf files. | ||
Masks data that is missing (turns into np.nan). | ||
Parameters | ||
---------- | ||
filename : string | ||
filename is the path to the file | ||
varname : string | ||
The variable to load from the MOPITT file | ||
Returns | ||
------- | ||
xarray.DataSet """ | ||
|
||
# initialize into dataset | ||
ds = xr.Dataset() | ||
|
||
# load the dimensions | ||
he5_load = h5py.File(filename, mode='r') | ||
lat = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Latitude"][:] | ||
lon = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Longitude"][:] | ||
alt = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Pressure2"][:] | ||
alt_short = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Pressure"][:] | ||
|
||
#2D or 3D variables to choose from | ||
variable_dict = {'column': "/HDFEOS/GRIDS/MOP03/Data Fields/RetrievedCOTotalColumnDay",\ | ||
'apriori_col': "/HDFEOS/GRIDS/MOP03/Data Fields/APrioriCOTotalColumnDay",\ | ||
'apriori_surf': "/HDFEOS/GRIDS/MOP03/Data Fields/APrioriCOSurfaceMixingRatioDay",\ | ||
'pressure_surf': "/HDFEOS/GRIDS/MOP03/Data Fields/SurfacePressureDay",\ | ||
'ak_col': "/HDFEOS/GRIDS/MOP03/Data Fields/TotalColumnAveragingKernelDay",\ | ||
'ak_prof': "/HDFEOS/GRIDS/MOP03/Data Fields/APrioriCOMixingRatioProfileDay" } | ||
try: | ||
data_loaded = he5_load[variable_dict[varname]][:] | ||
except: | ||
print("ERROR: Cannot load " + varname + " from " + filename) | ||
return 0 | ||
|
||
he5_load.close() | ||
|
||
#DEBEG | ||
#print(data_loaded.shape) | ||
|
||
# create xarray DataArray | ||
if (varname=='column' or varname=='apriori_col' | ||
or varname=='apriori_surf'or varname=='pressure_surf'): | ||
ds[varname] = xr.DataArray(data_loaded, dims=["lon","lat"], coords=[lon,lat]) | ||
# missing value -> nan | ||
ds[varname] = ds[varname].where(ds[varname] != -9999.) | ||
elif (varname=='ak_col'): | ||
ds[varname] = xr.DataArray(data_loaded, dims=["lon","lat","alt"], coords=[lon,lat,alt]) | ||
elif (varname=='apriori_prof'): | ||
ds[varname] = xr.DataArray(data_loaded, dims=["lon","lat","alt"], coords=[lon,lat,alt_short]) | ||
|
||
|
||
return ds | ||
|
||
|
||
def read_mopittdataset(files, varname): | ||
"""Loop through files to open the MOPITT level 3 data. | ||
Parameters | ||
---------- | ||
files : string or list of strings | ||
The full path to the file or files. Can take a file template with wildcard (*) symbol. | ||
varname : string | ||
The variable to load from the MOPITT file | ||
Returns | ||
------- | ||
xarray.DataSet """ | ||
|
||
count = 0 | ||
filelist = sorted(glob.glob(files, recursive=False)) | ||
|
||
for filename in filelist: | ||
print(filename) | ||
data = loadAndExtractGriddedHDF(filename, varname) | ||
time = getStartTime(filename) | ||
data = data.expand_dims(axis=0, time=[time]) | ||
if count == 0: | ||
full_dataset = data | ||
count += 1 | ||
else: | ||
full_dataset = xr.concat([full_dataset, data], 'time') | ||
|
||
return full_dataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
def read_OMPS_l3(files): | ||
"""Loop to open OMPS nadir mapper Total Column Ozone L3 files. | ||
Parameters: | ||
----------- | ||
files: string or list of strings | ||
returns xarray Dataset. | ||
""" | ||
|
||
from glob import glob | ||
import xarray as xr | ||
import numpy as np | ||
from matplotlib import pyplot as plt | ||
|
||
start_dataset = True | ||
times = [] | ||
filelist = sorted(glob(files,recursive=False)) | ||
|
||
for filename in filelist: | ||
data = extract_OMPS_l3(filename) | ||
times.append(data.attrs['time']) | ||
del data.attrs['time'] | ||
if start_dataset: | ||
data_array = data | ||
start_dataset = False | ||
else: | ||
data_array = xr.concat([data_array,data],'time') | ||
data_array['time'] = (('time'),np.array(times)) | ||
data_array = data_array.reset_coords().set_coords(['latitude','longitude','time']) | ||
return data_array | ||
|
||
def extract_OMPS_l3(fname): | ||
'''Read locally stored NASA Suomi NPP OMPS Level 3 Nadir Mapper TO3 files | ||
Parameters | ||
---------- | ||
fname: string | ||
fname is local path to h5 file | ||
Returns | ||
------- | ||
ds: xarray dataset | ||
''' | ||
|
||
import h5py | ||
import numpy as np | ||
import xarray as xr | ||
import pandas as pd | ||
from matplotlib import pyplot as plt | ||
|
||
with h5py.File(fname,'r') as f: | ||
lat = f['Latitude'][:] | ||
lon = f['Longitude'][:] | ||
column = f['ColumnAmountO3'][:] | ||
cloud_fraction = f['RadiativeCloudFraction'][:] | ||
time = pd.to_datetime(f.attrs.__getitem__('Date').decode('UTF-8'),format='%Y-%m-%d') | ||
|
||
# remove cloudy scenes and points with no data (eg. polar dark zone) | ||
column[(column < 0)] = np.nan | ||
column[(cloud_fraction > 0.3)] = np.nan | ||
lon_2d,lat_2d = np.meshgrid(lon,lat) | ||
|
||
ds = xr.Dataset({'ozone_column':(['time','x','y'],column[None,:,:]), | ||
}, | ||
coords={'longitude':(['x','y'],lon_2d),'latitude':(['x','y'],lat_2d),}, | ||
attrs={'time':time}) | ||
|
||
return ds |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
# Reading TROPOMI L2 NO2 data | ||
|
||
import os | ||
import sys | ||
import logging | ||
from glob import glob | ||
from collections import OrderedDict | ||
|
||
import numpy as np | ||
import xarray as xr | ||
|
||
from netCDF4 import Dataset | ||
|
||
#from monetio.hdf.hdfio import hdf_open, hdf_close, hdf_list, hdf_read | ||
|
||
def read_dataset(fname, variable_dict): | ||
""" | ||
Parameters | ||
__________ | ||
fname : str | ||
Input file path. | ||
Returns | ||
_______ | ||
xarray.Dataset | ||
""" | ||
print('reading ' + fname) | ||
|
||
ds = xr.Dataset() | ||
|
||
dso = Dataset(fname, "r") | ||
|
||
longitude = dso.groups['PRODUCT']['longitude'] | ||
latitude = dso.groups['PRODUCT']['latitude'] | ||
start_time = dso.groups['PRODUCT']['time'] | ||
|
||
# squeeze 1-dimension | ||
longitude = np.squeeze(longitude) | ||
latitude = np.squeeze(latitude) | ||
start_time = np.squeeze(start_time) | ||
|
||
ds['lon'] = xr.DataArray(longitude) | ||
ds['lat'] = xr.DataArray(latitude) | ||
|
||
for varname in variable_dict: | ||
print(varname) | ||
values = dso.groups['PRODUCT'][varname] | ||
# squeeze out 1-dimension | ||
values = np.squeeze(values) | ||
|
||
if 'fillvalue' in variable_dict[varname]: | ||
fillvalue = variable_dict[varname]['fillvalue'] | ||
values[:][values[:] == fillvalue] = np.nan | ||
|
||
if 'scale' in variable_dict[varname]: | ||
values[:] = variable_dict[varname]['scale'] * values[:] | ||
|
||
if 'minimum' in variable_dict[varname]: | ||
minimum = variable_dict[varname]['minimum'] | ||
values[:][values[:] < minimum] = np.nan | ||
|
||
if 'maximum' in variable_dict[varname]: | ||
maximum = variable_dict[varname]['maximum'] | ||
values[:][values[:] > maximum] = np.nan | ||
|
||
|
||
ds[varname] = xr.DataArray(values) | ||
|
||
if 'quality_flag_min' in variable_dict[varname]: | ||
ds.attrs['quality_flag'] = varname | ||
ds.attrs['quality_thresh_min'] = variable_dict[varname]['quality_flag_min'] | ||
|
||
|
||
dso.close() | ||
|
||
return ds | ||
|
||
|
||
def apply_quality_flag(ds): | ||
""" | ||
Parameters | ||
__________ | ||
ds : xarray.Dataset | ||
""" | ||
if 'quality_flag' in ds.attrs: | ||
quality_flag = ds[ds.attrs['quality_flag']] | ||
quality_thresh_min = ds.attrs['quality_thresh_min'] | ||
|
||
|
||
# apply the quality thresh minimum to all variables in ds | ||
|
||
for varname in ds: | ||
print(varname) | ||
if varname != ds.attrs['quality_flag']: | ||
logging.debug(varname) | ||
values = ds[varname].values | ||
|
||
values[quality_flag <= quality_thresh_min] = np.nan | ||
ds[varname].values = values | ||
|
||
|
||
def read_trpdataset(fnames, variable_dict, debug=False): | ||
""" | ||
Parameters | ||
__________ | ||
fnames : str | ||
Regular expression for input file paths. | ||
Returns | ||
_______ | ||
xarray.Dataset | ||
""" | ||
if debug: | ||
logging_level = logging.DEBUG | ||
else: | ||
logging_level = logging.INFO | ||
logging.basicConfig(stream=sys.stdout, level=logging_level) | ||
|
||
for subpath in fnames.split('/'): | ||
if '$' in subpath: | ||
envvar = subpath.replace('$', '') | ||
envval = os.getenv(envvar) | ||
if envval is None: | ||
print('environment variable not defined: ' + subpath) | ||
exit(1) | ||
else: | ||
fnames = fnames.replace(subpath, envval) | ||
|
||
print(fnames) | ||
files = sorted(glob(fnames)) | ||
granules = OrderedDict() | ||
for file in files: | ||
granule = read_dataset(file, variable_dict) | ||
apply_quality_flag(granule) | ||
granule_str = file.split('/')[-1] | ||
granule_info = granule_str.split('____') | ||
|
||
datetime_str = granule_info[1][0:8] | ||
granules[datetime_str] = granule | ||
|
||
return granules |