Skip to content

Commit

Permalink
Develop satellite (noaa-oar-arl#29)
Browse files Browse the repository at this point in the history
* Update raqms.py

tracking problems in colab

* Update __init__.py

* Update __init__.py

* Update __init__.py

* trying to make Kaggle import work

* Update __init__.py

* Update __init__.py

* Update __init__.py

* Update __init__.py

* Update __init__.py

* Update __init__.py

* Update __init__.py

* Update __init__.py

* Update __init__.py

* Update __init__.py

* Update __init__.py

* Update __init__.py

* Update setup.cfg

* Update setup.cfg

* Update __init__.py

* Update raqms.py

adjusting timestamp. Needed for pairing. Unsure if this was a temporary fix sort of thing

* Update raqms.py

* Update raqms.py

* Update raqms.py

* Update omps_nadir.py

* Update omps_nadir.py

* data handling issue fix

* Update omps_nadir.py

* Update omps_nadir.py

* add trimmed file reader for earthcube notebook

* Update raqms.py

* Update raqms.py

* Update raqms.py

* merge fixes

* trying to fix for merge

* Adding MOPITT read

* tropomi no2 reader

* rename

* adding hdf close file

* omps level 3 reader

* Update raqms.py

remove double of _fix_pres

---------

Co-authored-by: mebruckner <48494069+mebruckner@users.noreply.github.com>
Co-authored-by: Maggie Bruckner <mbruckner@raqms-ops.ssec.wisc.edu>
Co-authored-by: Meng Li <mengli@MengM1.local>
  • Loading branch information
4 people authored May 4, 2023
1 parent 52e9873 commit 82d5565
Show file tree
Hide file tree
Showing 4 changed files with 353 additions and 0 deletions.
6 changes: 6 additions & 0 deletions monetio/sat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from . import (
_tropomi_l2_no2_mm,
_gridded_eos_mm,
_modis_l2_mm,
_omps_nadir_mm,
_omps_l3_mm,
_mopitt_l3_mm,
goes,
modis_ornl,
nesdis_edr_viirs,
Expand All @@ -10,9 +13,12 @@
)

__all__ = [
"_tropomi_l2_no2_mm",
"_gridded_eos_mm",
"_modis_l2_mm",
"_omps_nadir_mm",
"_omps_l3_mm",
"_mopitt_l3_mm",
"nesdis_edr_viirs",
"nesdis_eps_viirs",
"nesdis_frp",
Expand Down
138 changes: 138 additions & 0 deletions monetio/sat/_mopitt_l3_mm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
""" MOPITT gridded data File reader
updated 2022-10 rrb
* DataSet instead of DataArray
created 2021-12 rrb
"""
import pandas as pd
import xarray as xr
import numpy as np
import h5py
import glob
import os



def getStartTime(filename):
"""Method to read the time in MOPITT level 3 hdf files.
Parameters
----------
filename : string or list
filename is the path to the file
Returns
-------
startTime """

structure ='/HDFEOS/ADDITIONAL/FILE_ATTRIBUTES'
#print("READING FILE " + inFileName)
fName = os.path.basename(filename)

try:
inFile = h5py.File(filename,'r')
except:
print("ERROR: CANNOT OPEN " + filename)
return 0

grp = inFile[structure]
k = grp.attrs
startTimeBytes = k.get("StartTime",default=None)
startTime = pd.to_datetime(startTimeBytes[0], unit='s', origin='1993-01-01 00:00:00')
#print("******************", startTime)

try:
inFile.close()
except:
print("ERROR CANNOT CLOSE " + filename)
return 0

return startTime


def loadAndExtractGriddedHDF(filename,varname):
"""Method to open MOPITT gridded hdf files.
Masks data that is missing (turns into np.nan).
Parameters
----------
filename : string
filename is the path to the file
varname : string
The variable to load from the MOPITT file
Returns
-------
xarray.DataSet """

# initialize into dataset
ds = xr.Dataset()

# load the dimensions
he5_load = h5py.File(filename, mode='r')
lat = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Latitude"][:]
lon = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Longitude"][:]
alt = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Pressure2"][:]
alt_short = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Pressure"][:]

#2D or 3D variables to choose from
variable_dict = {'column': "/HDFEOS/GRIDS/MOP03/Data Fields/RetrievedCOTotalColumnDay",\
'apriori_col': "/HDFEOS/GRIDS/MOP03/Data Fields/APrioriCOTotalColumnDay",\
'apriori_surf': "/HDFEOS/GRIDS/MOP03/Data Fields/APrioriCOSurfaceMixingRatioDay",\
'pressure_surf': "/HDFEOS/GRIDS/MOP03/Data Fields/SurfacePressureDay",\
'ak_col': "/HDFEOS/GRIDS/MOP03/Data Fields/TotalColumnAveragingKernelDay",\
'ak_prof': "/HDFEOS/GRIDS/MOP03/Data Fields/APrioriCOMixingRatioProfileDay" }
try:
data_loaded = he5_load[variable_dict[varname]][:]
except:
print("ERROR: Cannot load " + varname + " from " + filename)
return 0

he5_load.close()

#DEBEG
#print(data_loaded.shape)

# create xarray DataArray
if (varname=='column' or varname=='apriori_col'
or varname=='apriori_surf'or varname=='pressure_surf'):
ds[varname] = xr.DataArray(data_loaded, dims=["lon","lat"], coords=[lon,lat])
# missing value -> nan
ds[varname] = ds[varname].where(ds[varname] != -9999.)
elif (varname=='ak_col'):
ds[varname] = xr.DataArray(data_loaded, dims=["lon","lat","alt"], coords=[lon,lat,alt])
elif (varname=='apriori_prof'):
ds[varname] = xr.DataArray(data_loaded, dims=["lon","lat","alt"], coords=[lon,lat,alt_short])


return ds


def read_mopittdataset(files, varname):
"""Loop through files to open the MOPITT level 3 data.
Parameters
----------
files : string or list of strings
The full path to the file or files. Can take a file template with wildcard (*) symbol.
varname : string
The variable to load from the MOPITT file
Returns
-------
xarray.DataSet """

count = 0
filelist = sorted(glob.glob(files, recursive=False))

for filename in filelist:
print(filename)
data = loadAndExtractGriddedHDF(filename, varname)
time = getStartTime(filename)
data = data.expand_dims(axis=0, time=[time])
if count == 0:
full_dataset = data
count += 1
else:
full_dataset = xr.concat([full_dataset, data], 'time')

return full_dataset
68 changes: 68 additions & 0 deletions monetio/sat/_omps_l3_mm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
def read_OMPS_l3(files):
"""Loop to open OMPS nadir mapper Total Column Ozone L3 files.
Parameters:
-----------
files: string or list of strings
returns xarray Dataset.
"""

from glob import glob
import xarray as xr
import numpy as np
from matplotlib import pyplot as plt

start_dataset = True
times = []
filelist = sorted(glob(files,recursive=False))

for filename in filelist:
data = extract_OMPS_l3(filename)
times.append(data.attrs['time'])
del data.attrs['time']
if start_dataset:
data_array = data
start_dataset = False
else:
data_array = xr.concat([data_array,data],'time')
data_array['time'] = (('time'),np.array(times))
data_array = data_array.reset_coords().set_coords(['latitude','longitude','time'])
return data_array

def extract_OMPS_l3(fname):
'''Read locally stored NASA Suomi NPP OMPS Level 3 Nadir Mapper TO3 files
Parameters
----------
fname: string
fname is local path to h5 file
Returns
-------
ds: xarray dataset
'''

import h5py
import numpy as np
import xarray as xr
import pandas as pd
from matplotlib import pyplot as plt

with h5py.File(fname,'r') as f:
lat = f['Latitude'][:]
lon = f['Longitude'][:]
column = f['ColumnAmountO3'][:]
cloud_fraction = f['RadiativeCloudFraction'][:]
time = pd.to_datetime(f.attrs.__getitem__('Date').decode('UTF-8'),format='%Y-%m-%d')

# remove cloudy scenes and points with no data (eg. polar dark zone)
column[(column < 0)] = np.nan
column[(cloud_fraction > 0.3)] = np.nan
lon_2d,lat_2d = np.meshgrid(lon,lat)

ds = xr.Dataset({'ozone_column':(['time','x','y'],column[None,:,:]),
},
coords={'longitude':(['x','y'],lon_2d),'latitude':(['x','y'],lat_2d),},
attrs={'time':time})

return ds
141 changes: 141 additions & 0 deletions monetio/sat/_tropomi_l2_no2_mm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# Reading TROPOMI L2 NO2 data

import os
import sys
import logging
from glob import glob
from collections import OrderedDict

import numpy as np
import xarray as xr

from netCDF4 import Dataset

#from monetio.hdf.hdfio import hdf_open, hdf_close, hdf_list, hdf_read

def read_dataset(fname, variable_dict):
"""
Parameters
__________
fname : str
Input file path.
Returns
_______
xarray.Dataset
"""
print('reading ' + fname)

ds = xr.Dataset()

dso = Dataset(fname, "r")

longitude = dso.groups['PRODUCT']['longitude']
latitude = dso.groups['PRODUCT']['latitude']
start_time = dso.groups['PRODUCT']['time']

# squeeze 1-dimension
longitude = np.squeeze(longitude)
latitude = np.squeeze(latitude)
start_time = np.squeeze(start_time)

ds['lon'] = xr.DataArray(longitude)
ds['lat'] = xr.DataArray(latitude)

for varname in variable_dict:
print(varname)
values = dso.groups['PRODUCT'][varname]
# squeeze out 1-dimension
values = np.squeeze(values)

if 'fillvalue' in variable_dict[varname]:
fillvalue = variable_dict[varname]['fillvalue']
values[:][values[:] == fillvalue] = np.nan

if 'scale' in variable_dict[varname]:
values[:] = variable_dict[varname]['scale'] * values[:]

if 'minimum' in variable_dict[varname]:
minimum = variable_dict[varname]['minimum']
values[:][values[:] < minimum] = np.nan

if 'maximum' in variable_dict[varname]:
maximum = variable_dict[varname]['maximum']
values[:][values[:] > maximum] = np.nan


ds[varname] = xr.DataArray(values)

if 'quality_flag_min' in variable_dict[varname]:
ds.attrs['quality_flag'] = varname
ds.attrs['quality_thresh_min'] = variable_dict[varname]['quality_flag_min']


dso.close()

return ds


def apply_quality_flag(ds):
"""
Parameters
__________
ds : xarray.Dataset
"""
if 'quality_flag' in ds.attrs:
quality_flag = ds[ds.attrs['quality_flag']]
quality_thresh_min = ds.attrs['quality_thresh_min']


# apply the quality thresh minimum to all variables in ds

for varname in ds:
print(varname)
if varname != ds.attrs['quality_flag']:
logging.debug(varname)
values = ds[varname].values

values[quality_flag <= quality_thresh_min] = np.nan
ds[varname].values = values


def read_trpdataset(fnames, variable_dict, debug=False):
"""
Parameters
__________
fnames : str
Regular expression for input file paths.
Returns
_______
xarray.Dataset
"""
if debug:
logging_level = logging.DEBUG
else:
logging_level = logging.INFO
logging.basicConfig(stream=sys.stdout, level=logging_level)

for subpath in fnames.split('/'):
if '$' in subpath:
envvar = subpath.replace('$', '')
envval = os.getenv(envvar)
if envval is None:
print('environment variable not defined: ' + subpath)
exit(1)
else:
fnames = fnames.replace(subpath, envval)

print(fnames)
files = sorted(glob(fnames))
granules = OrderedDict()
for file in files:
granule = read_dataset(file, variable_dict)
apply_quality_flag(granule)
granule_str = file.split('/')[-1]
granule_info = granule_str.split('____')

datetime_str = granule_info[1][0:8]
granules[datetime_str] = granule

return granules

0 comments on commit 82d5565

Please sign in to comment.