diff --git a/.github/workflows/label.yml b/.github/workflows/label.yml index e90b599b..5e328382 100644 --- a/.github/workflows/label.yml +++ b/.github/workflows/label.yml @@ -10,10 +10,9 @@ on: [pull_request] jobs: label: - runs-on: ubuntu-latest steps: - - uses: actions/labeler@v2 - with: - repo-token: "${{ secrets.GITHUB_TOKEN }}" + - uses: actions/labeler@v2 + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..e5463fb8 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,34 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: "v4.1.0" + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-docstring-first + - id: check-yaml + + - repo: https://github.com/asottile/pyupgrade + rev: "v2.31.0" + hooks: + - id: pyupgrade + args: [--py36-plus] + + - repo: https://github.com/PyCQA/isort + rev: "5.10.1" + hooks: + - id: isort + + - repo: https://github.com/psf/black + rev: "21.12b0" + hooks: + - id: black + + - repo: https://github.com/PyCQA/flake8 + rev: "4.0.1" + hooks: + - id: flake8 + + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v2.5.1" + hooks: + - id: prettier diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 7af67655..8d1728fd 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,22 +1,19 @@ version: 2 - build: image: latest - # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py - #conda: # environment: docs/environment.yml # Optionally set the version of Python and requirements required to build your docs python: version: 3.6 -# setup_py_install: false + # setup_py_install: false install: - requirements: docs/requirements.txt diff --git a/.stickler.yml b/.stickler.yml index 260a3727..c5b4932d 100644 --- a/.stickler.yml +++ b/.stickler.yml @@ -7,6 +7,6 @@ fixers: review: fail_on_comments: false files: - ignore: - - 'docs/**/*' - - 'monet/data/*' + ignore: + - "docs/**/*" + - "monet/data/*" diff --git a/.travis.yaml b/.travis.yaml index 9a7ab33c..61470176 100644 --- a/.travis.yaml +++ b/.travis.yaml @@ -1,10 +1,9 @@ language: python - branches: only: - - master - - develop + - master + - develop before_install: - chmod +x miniconda.sh @@ -13,9 +12,7 @@ before_install: - conda config --set always_yes yes --set changeps1 no - conda update --yes conda - install: - conda install -c bbakernoaa monet - # after_success: # - test $TRAVIS_BRANCH = "master" && conda install conda-build && conda install anaconda-client && bash conda/conda_upload.sh diff --git a/README.md b/README.md index 91dfe838..506877da 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ## The Model and ObservatioN Evaluation Tool I/O -This is a tool developed at NOAA ARL to handle I/O operations for atmospheric composition for python. It was originally part of the Model and ObservatioN Evaluation Tool (MONET; see Baker and Li (2017)). +This is a tool developed at NOAA ARL to handle I/O operations for atmospheric composition for python. It was originally part of the Model and ObservatioN Evaluation Tool (MONET; see Baker and Li (2017)). -Please refer to the readthedocs page at monetio.readthedocs.io for the documentation. +Please refer to the readthedocs page at monetio.readthedocs.io for the documentation. Baker, Barry; Pan, Li. 2017. “Overview of the Model and Observation Evaluation Toolkit (MONET) Version 1.0 for Evaluating Atmospheric Transport Models.” Atmosphere 8, no. 11: 210 diff --git a/docs/Makefile b/docs/Makefile index c2011a0b..cbdfc058 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -17,4 +17,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/style.css b/docs/_static/style.css index 6f426e01..1de01575 100644 --- a/docs/_static/style.css +++ b/docs/_static/style.css @@ -1,18 +1,18 @@ @import url("theme.css"); -.wy-side-nav-search>a img.logo, -.wy-side-nav-search .wy-dropdown>a img.logo { - width: 12rem +.wy-side-nav-search > a img.logo, +.wy-side-nav-search .wy-dropdown > a img.logo { + width: 12rem; } .wy-side-nav-search { background-color: #eee; } -.wy-side-nav-search>div.version { +.wy-side-nav-search > div.version { display: none; } .wy-nav-top { background-color: #ffffff; -} \ No newline at end of file +} diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html index 4c57ba83..31410c9b 100644 --- a/docs/_templates/layout.html +++ b/docs/_templates/layout.html @@ -1,2 +1,2 @@ -{% extends "!layout.html" %} -{% set css_files = css_files + ["_static/style.css"] %} +{% extends "!layout.html" %} {% set css_files = css_files + +["_static/style.css"] %} diff --git a/docs/conf.py b/docs/conf.py index 395e7f70..afc02bb5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. # @@ -23,18 +22,18 @@ def __getattr__(cls, name): return MagicMock() -#sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) -sys.path.insert(0, os.path.abspath('../')) +# sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) +sys.path.insert(0, os.path.abspath("../")) # -- Project information ----------------------------------------------------- -project = u'monetio' -copyright = u'2018, Barry Baker' -author = u'Barry Baker' +project = "monetio" +copyright = "2018, Barry Baker" +author = "Barry Baker" # The short X.Y version -version = u'' +version = "" # The full version, including alpha/beta/rc tags -release = u'' +release = "" # -- General configuration --------------------------------------------------- @@ -46,14 +45,16 @@ def __getattr__(cls, name): # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.napoleon', - 'sphinx.ext.extlinks' + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.napoleon", + "sphinx.ext.extlinks", ] -#exclude_patterns = ['_build', '**.ipynb_checkpoints'] +# exclude_patterns = ['_build', '**.ipynb_checkpoints'] extlinks = { - 'issue': ('https://github.com/noaa-oar-arl/monetio/issues/%s', 'GH'), - 'pull': ('https://github.com/noaa-oar-arl/monetio/pull/%s', 'PR'), + "issue": ("https://github.com/noaa-oar-arl/monetio/issues/%s", "GH"), + "pull": ("https://github.com/noaa-oar-arl/monetio/pull/%s", "PR"), } autosummary_generate = True @@ -63,16 +64,16 @@ def __getattr__(cls, name): napoleon_use_ivar = False # True # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -84,17 +85,17 @@ def __getattr__(cls, name): # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . -exclude_patterns = [u'_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -105,7 +106,7 @@ def __getattr__(cls, name): # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -120,10 +121,10 @@ def __getattr__(cls, name): # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'monetiodoc' +htmlhelp_basename = "monetiodoc" html_theme_options = { - 'logo_only': True, + "logo_only": True, } # Add any paths that contain custom themes here, relative to this directory. @@ -146,15 +147,12 @@ def __getattr__(cls, name): # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -164,15 +162,14 @@ def __getattr__(cls, name): # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'monetio.tex', u'monetio Documentation', u'Barry Baker', - 'manual'), + (master_doc, "monetio.tex", "monetio Documentation", "Barry Baker", "manual"), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [(master_doc, 'monetio', u'monetio Documentation', [author], 1)] +man_pages = [(master_doc, "monetio", "monetio Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------------- @@ -180,8 +177,15 @@ def __getattr__(cls, name): # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'monetio', u'monetio Documentation', author, 'monetio', - 'One line description of project.', 'Miscellaneous'), + ( + master_doc, + "monetio", + "monetio Documentation", + author, + "monetio", + "One line description of project.", + "Miscellaneous", + ), ] # -- Extension configuration ------------------------------------------------- diff --git a/docs/models.rst b/docs/models.rst index 7394e3e9..8a3f878b 100644 --- a/docs/models.rst +++ b/docs/models.rst @@ -128,7 +128,7 @@ where pm25 is a :py:class:`~xarray.DataArray` as it is a single variable. Prep-Chem-Sources ----------------- -A reader and writer was built into +A reader and writer was built into HYSPLIT @@ -250,7 +250,7 @@ To calcluate mass loading All points with value below or equal to threshold will be returned as 0. mult is a multiplicative factor applied before the thresholding. -species can be a list of values from the "Species ID" attribute. +species can be a list of values from the "Species ID" attribute. If it is None then all species will be used. To find top heights @@ -261,11 +261,7 @@ To find top heights returns xarray DataArray which gives top height of each level which contains mass loading higher than the given threshold value. mult is a mutiplicative factor applied before thresholding. -height_mult is a multiplicative factor used to convert heights from meters to some other unit. +height_mult is a multiplicative factor used to convert heights from meters to some other unit. In this example heights are converted to km. mass_load is a boolean which indicates whether the height should be determined from the mass loading value (True) -or the concentration value (False). - - - - +or the concentration value (False). diff --git a/monetio/__init__.py b/monetio/__init__.py index b5e1b95e..fba21936 100644 --- a/monetio/__init__.py +++ b/monetio/__init__.py @@ -94,7 +94,7 @@ def dataset_to_monet(dset, lat_name="lat", lon_name="lon", latlon2d=False): def coards_to_netcdf(dset, lat_name="lat", lon_name="lon"): - from numpy import meshgrid, arange + from numpy import arange, meshgrid lon = dset[lon_name] lat = dset[lat_name] diff --git a/monetio/data/sn_bound_10deg.txt b/monetio/data/sn_bound_10deg.txt index 8c96d557..64b012b0 100644 --- a/monetio/data/sn_bound_10deg.txt +++ b/monetio/data/sn_bound_10deg.txt @@ -1 +1 @@ -Sinusoidal Grid Bounding Coordinates of MODLAND Tiles ntile_vert = 18 ntile_horiz = 36 iv ih lon_min lon_max lat_min lat_max 0 0 -999.0000 -999.0000 -99.0000 -99.0000 0 1 -999.0000 -999.0000 -99.0000 -99.0000 0 2 -999.0000 -999.0000 -99.0000 -99.0000 0 3 -999.0000 -999.0000 -99.0000 -99.0000 0 4 -999.0000 -999.0000 -99.0000 -99.0000 0 5 -999.0000 -999.0000 -99.0000 -99.0000 0 6 -999.0000 -999.0000 -99.0000 -99.0000 0 7 -999.0000 -999.0000 -99.0000 -99.0000 0 8 -999.0000 -999.0000 -99.0000 -99.0000 0 9 -999.0000 -999.0000 -99.0000 -99.0000 0 10 -999.0000 -999.0000 -99.0000 -99.0000 0 11 -999.0000 -999.0000 -99.0000 -99.0000 0 12 -999.0000 -999.0000 -99.0000 -99.0000 0 13 -999.0000 -999.0000 -99.0000 -99.0000 0 14 -180.0000 -172.7151 80.0000 80.4083 0 15 -180.0000 -115.1274 80.0000 83.6250 0 16 -180.0000 -57.5397 80.0000 86.8167 0 17 -180.0000 57.2957 80.0000 90.0000 0 18 -0.0040 180.0000 80.0000 90.0000 0 19 57.5877 180.0000 80.0000 86.8167 0 20 115.1754 180.0000 80.0000 83.6250 0 21 172.7631 180.0000 80.0000 80.4083 0 22 -999.0000 -999.0000 -99.0000 -99.0000 0 23 -999.0000 -999.0000 -99.0000 -99.0000 0 24 -999.0000 -999.0000 -99.0000 -99.0000 0 25 -999.0000 -999.0000 -99.0000 -99.0000 0 26 -999.0000 -999.0000 -99.0000 -99.0000 0 27 -999.0000 -999.0000 -99.0000 -99.0000 0 28 -999.0000 -999.0000 -99.0000 -99.0000 0 29 -999.0000 -999.0000 -99.0000 -99.0000 0 30 -999.0000 -999.0000 -99.0000 -99.0000 0 31 -999.0000 -999.0000 -99.0000 -99.0000 0 32 -999.0000 -999.0000 -99.0000 -99.0000 0 33 -999.0000 -999.0000 -99.0000 -99.0000 0 34 -999.0000 -999.0000 -99.0000 -99.0000 0 35 -999.0000 -999.0000 -99.0000 -99.0000 1 0 -999.0000 -999.0000 -99.0000 -99.0000 1 1 -999.0000 -999.0000 -99.0000 -99.0000 1 2 -999.0000 -999.0000 -99.0000 -99.0000 1 3 -999.0000 -999.0000 -99.0000 -99.0000 1 4 -999.0000 -999.0000 -99.0000 -99.0000 1 5 -999.0000 -999.0000 -99.0000 -99.0000 1 6 -999.0000 -999.0000 -99.0000 -99.0000 1 7 -999.0000 -999.0000 -99.0000 -99.0000 1 8 -999.0000 -999.0000 -99.0000 -99.0000 1 9 -999.0000 -999.0000 -99.0000 -99.0000 1 10 -999.0000 -999.0000 -99.0000 -99.0000 1 11 -180.0000 -175.4039 70.0000 70.5333 1 12 -180.0000 -146.1659 70.0000 73.8750 1 13 -180.0000 -116.9278 70.0000 77.1667 1 14 -180.0000 -87.6898 70.0000 80.0000 1 15 -172.7631 -58.4517 70.0000 80.0000 1 16 -115.1754 -29.2137 70.0000 80.0000 1 17 -57.5877 0.0480 70.0000 80.0000 1 18 0.0000 57.6357 70.0000 80.0000 1 19 29.2380 115.2234 70.0000 80.0000 1 20 58.4761 172.8111 70.0000 80.0000 1 21 87.7141 180.0000 70.0000 80.0000 1 22 116.9522 180.0000 70.0000 77.1583 1 23 146.1902 180.0000 70.0000 73.8750 1 24 175.4283 180.0000 70.0000 70.5333 1 25 -999.0000 -999.0000 -99.0000 -99.0000 1 26 -999.0000 -999.0000 -99.0000 -99.0000 1 27 -999.0000 -999.0000 -99.0000 -99.0000 1 28 -999.0000 -999.0000 -99.0000 -99.0000 1 29 -999.0000 -999.0000 -99.0000 -99.0000 1 30 -999.0000 -999.0000 -99.0000 -99.0000 1 31 -999.0000 -999.0000 -99.0000 -99.0000 1 32 -999.0000 -999.0000 -99.0000 -99.0000 1 33 -999.0000 -999.0000 -99.0000 -99.0000 1 34 -999.0000 -999.0000 -99.0000 -99.0000 1 35 -999.0000 -999.0000 -99.0000 -99.0000 2 0 -999.0000 -999.0000 -99.0000 -99.0000 2 1 -999.0000 -999.0000 -99.0000 -99.0000 2 2 -999.0000 -999.0000 -99.0000 -99.0000 2 3 -999.0000 -999.0000 -99.0000 -99.0000 2 4 -999.0000 -999.0000 -99.0000 -99.0000 2 5 -999.0000 -999.0000 -99.0000 -99.0000 2 6 -999.0000 -999.0000 -99.0000 -99.0000 2 7 -999.0000 -999.0000 -99.0000 -99.0000 2 8 -999.0000 -999.0000 -99.0000 -99.0000 2 9 -180.0000 -159.9833 60.0000 63.6167 2 10 -180.0000 -139.9833 60.0000 67.1167 2 11 -180.0000 -119.9833 60.0000 70.0000 2 12 -175.4283 -99.9833 60.0000 70.0000 2 13 -146.1902 -79.9833 60.0000 70.0000 2 14 -116.9522 -59.9833 60.0000 70.0000 2 15 -87.7141 -39.9833 60.0000 70.0000 2 16 -58.4761 -19.9833 60.0000 70.0000 2 17 -29.2380 0.0244 60.0000 70.0000 2 18 0.0000 29.2624 60.0000 70.0000 2 19 20.0000 58.5005 60.0000 70.0000 2 20 40.0000 87.7385 60.0000 70.0000 2 21 60.0000 116.9765 60.0000 70.0000 2 22 80.0000 146.2146 60.0000 70.0000 2 23 100.0000 175.4526 60.0000 70.0000 2 24 120.0000 180.0000 60.0000 70.0000 2 25 140.0000 180.0000 60.0000 67.1167 2 26 160.0000 180.0000 60.0000 63.6167 2 27 -999.0000 -999.0000 -99.0000 -99.0000 2 28 -999.0000 -999.0000 -99.0000 -99.0000 2 29 -999.0000 -999.0000 -99.0000 -99.0000 2 30 -999.0000 -999.0000 -99.0000 -99.0000 2 31 -999.0000 -999.0000 -99.0000 -99.0000 2 32 -999.0000 -999.0000 -99.0000 -99.0000 2 33 -999.0000 -999.0000 -99.0000 -99.0000 2 34 -999.0000 -999.0000 -99.0000 -99.0000 2 35 -999.0000 -999.0000 -99.0000 -99.0000 3 0 -999.0000 -999.0000 -99.0000 -99.0000 3 1 -999.0000 -999.0000 -99.0000 -99.0000 3 2 -999.0000 -999.0000 -99.0000 -99.0000 3 3 -999.0000 -999.0000 -99.0000 -99.0000 3 4 -999.0000 -999.0000 -99.0000 -99.0000 3 5 -999.0000 -999.0000 -99.0000 -99.0000 3 6 -180.0000 -171.1167 50.0000 52.3333 3 7 -180.0000 -155.5594 50.0000 56.2583 3 8 -180.0000 -140.0022 50.0000 60.0000 3 9 -180.0000 -124.4449 50.0000 60.0000 3 10 -160.0000 -108.8877 50.0000 60.0000 3 11 -140.0000 -93.3305 50.0000 60.0000 3 12 -120.0000 -77.7732 50.0000 60.0000 3 13 -100.0000 -62.2160 50.0000 60.0000 3 14 -80.0000 -46.6588 50.0000 60.0000 3 15 -60.0000 -31.1015 50.0000 60.0000 3 16 -40.0000 -15.5443 50.0000 60.0000 3 17 -20.0000 0.0167 50.0000 60.0000 3 18 0.0000 20.0167 50.0000 60.0000 3 19 15.5572 40.0167 50.0000 60.0000 3 20 31.1145 60.0167 50.0000 60.0000 3 21 46.6717 80.0167 50.0000 60.0000 3 22 62.2290 100.0167 50.0000 60.0000 3 23 77.7862 120.0167 50.0000 60.0000 3 24 93.3434 140.0167 50.0000 60.0000 3 25 108.9007 160.0167 50.0000 60.0000 3 26 124.4579 180.0000 50.0000 60.0000 3 27 140.0151 180.0000 50.0000 60.0000 3 28 155.5724 180.0000 50.0000 56.2500 3 29 171.1296 180.0000 50.0000 52.3333 3 30 -999.0000 -999.0000 -99.0000 -99.0000 3 31 -999.0000 -999.0000 -99.0000 -99.0000 3 32 -999.0000 -999.0000 -99.0000 -99.0000 3 33 -999.0000 -999.0000 -99.0000 -99.0000 3 34 -999.0000 -999.0000 -99.0000 -99.0000 3 35 -999.0000 -999.0000 -99.0000 -99.0000 4 0 -999.0000 -999.0000 -99.0000 -99.0000 4 1 -999.0000 -999.0000 -99.0000 -99.0000 4 2 -999.0000 -999.0000 -99.0000 -99.0000 4 3 -999.0000 -999.0000 -99.0000 -99.0000 4 4 -180.0000 -169.6921 40.0000 43.7667 4 5 -180.0000 -156.6380 40.0000 48.1917 4 6 -180.0000 -143.5839 40.0000 50.0000 4 7 -171.1296 -130.5299 40.0000 50.0000 4 8 -155.5724 -117.4758 40.0000 50.0000 4 9 -140.0151 -104.4217 40.0000 50.0000 4 10 -124.4579 -91.3676 40.0000 50.0000 4 11 -108.9007 -78.3136 40.0000 50.0000 4 12 -93.3434 -65.2595 40.0000 50.0000 4 13 -77.7862 -52.2054 40.0000 50.0000 4 14 -62.2290 -39.1513 40.0000 50.0000 4 15 -46.6717 -26.0973 40.0000 50.0000 4 16 -31.1145 -13.0432 40.0000 50.0000 4 17 -15.5572 0.0130 40.0000 50.0000 4 18 0.0000 15.5702 40.0000 50.0000 4 19 13.0541 31.1274 40.0000 50.0000 4 20 26.1081 46.6847 40.0000 50.0000 4 21 39.1622 62.2419 40.0000 50.0000 4 22 52.2163 77.7992 40.0000 50.0000 4 23 65.2704 93.3564 40.0000 50.0000 4 24 78.3244 108.9136 40.0000 50.0000 4 25 91.3785 124.4709 40.0000 50.0000 4 26 104.4326 140.0281 40.0000 50.0000 4 27 117.4867 155.5853 40.0000 50.0000 4 28 130.5407 171.1426 40.0000 50.0000 4 29 143.5948 180.0000 40.0000 50.0000 4 30 156.6489 180.0000 40.0000 48.1917 4 31 169.7029 180.0000 40.0000 43.7583 4 32 -999.0000 -999.0000 -99.0000 -99.0000 4 33 -999.0000 -999.0000 -99.0000 -99.0000 4 34 -999.0000 -999.0000 -99.0000 -99.0000 4 35 -999.0000 -999.0000 -99.0000 -99.0000 5 0 -999.0000 -999.0000 -99.0000 -99.0000 5 1 -999.0000 -999.0000 -99.0000 -99.0000 5 2 -180.0000 -173.1955 30.0000 33.5583 5 3 -180.0000 -161.6485 30.0000 38.9500 5 4 -180.0000 -150.1014 30.0000 40.0000 5 5 -169.7029 -138.5544 30.0000 40.0000 5 6 -156.6489 -127.0074 30.0000 40.0000 5 7 -143.5948 -115.4604 30.0000 40.0000 5 8 -130.5407 -103.9134 30.0000 40.0000 5 9 -117.4867 -92.3664 30.0000 40.0000 5 10 -104.4326 -80.8194 30.0000 40.0000 5 11 -91.3785 -69.2724 30.0000 40.0000 5 12 -78.3244 -57.7254 30.0000 40.0000 5 13 -65.2704 -46.1784 30.0000 40.0000 5 14 -52.2163 -34.6314 30.0000 40.0000 5 15 -39.1622 -23.0844 30.0000 40.0000 5 16 -26.1081 -11.5374 30.0000 40.0000 5 17 -13.0541 0.0109 30.0000 40.0000 5 18 0.0000 13.0650 30.0000 40.0000 5 19 11.5470 26.1190 30.0000 40.0000 5 20 23.0940 39.1731 30.0000 40.0000 5 21 34.6410 52.2272 30.0000 40.0000 5 22 46.1880 65.2812 30.0000 40.0000 5 23 57.7350 78.3353 30.0000 40.0000 5 24 69.2820 91.3894 30.0000 40.0000 5 25 80.8290 104.4435 30.0000 40.0000 5 26 92.3760 117.4975 30.0000 40.0000 5 27 103.9230 130.5516 30.0000 40.0000 5 28 115.4701 143.6057 30.0000 40.0000 5 29 127.0171 156.6598 30.0000 40.0000 5 30 138.5641 169.7138 30.0000 40.0000 5 31 150.1111 180.0000 30.0000 40.0000 5 32 161.6581 180.0000 30.0000 38.9417 5 33 173.2051 180.0000 30.0000 33.5583 5 34 -999.0000 -999.0000 -99.0000 -99.0000 5 35 -999.0000 -999.0000 -99.0000 -99.0000 6 0 -999.0000 -999.0000 -99.0000 -99.0000 6 1 -180.0000 -170.2596 20.0000 27.2667 6 2 -180.0000 -159.6178 20.0000 30.0000 6 3 -173.2051 -148.9760 20.0000 30.0000 6 4 -161.6581 -138.3342 20.0000 30.0000 6 5 -150.1111 -127.6925 20.0000 30.0000 6 6 -138.5641 -117.0507 20.0000 30.0000 6 7 -127.0171 -106.4089 20.0000 30.0000 6 8 -115.4701 -95.7671 20.0000 30.0000 6 9 -103.9230 -85.1254 20.0000 30.0000 6 10 -92.3760 -74.4836 20.0000 30.0000 6 11 -80.8290 -63.8418 20.0000 30.0000 6 12 -69.2820 -53.2000 20.0000 30.0000 6 13 -57.7350 -42.5582 20.0000 30.0000 6 14 -46.1880 -31.9165 20.0000 30.0000 6 15 -34.6410 -21.2747 20.0000 30.0000 6 16 -23.0940 -10.6329 20.0000 30.0000 6 17 -11.5470 0.0096 20.0000 30.0000 6 18 0.0000 11.5566 20.0000 30.0000 6 19 10.6418 23.1036 20.0000 30.0000 6 20 21.2836 34.6506 20.0000 30.0000 6 21 31.9253 46.1976 20.0000 30.0000 6 22 42.5671 57.7446 20.0000 30.0000 6 23 53.2089 69.2917 20.0000 30.0000 6 24 63.8507 80.8387 20.0000 30.0000 6 25 74.4924 92.3857 20.0000 30.0000 6 26 85.1342 103.9327 20.0000 30.0000 6 27 95.7760 115.4797 20.0000 30.0000 6 28 106.4178 127.0267 20.0000 30.0000 6 29 117.0596 138.5737 20.0000 30.0000 6 30 127.7013 150.1207 20.0000 30.0000 6 31 138.3431 161.6677 20.0000 30.0000 6 32 148.9849 173.2147 20.0000 30.0000 6 33 159.6267 180.0000 20.0000 30.0000 6 34 170.2684 180.0000 20.0000 27.2667 6 35 -999.0000 -999.0000 -99.0000 -99.0000 7 0 -180.0000 -172.6141 10.0000 19.1917 7 1 -180.0000 -162.4598 10.0000 20.0000 7 2 -170.2684 -152.3055 10.0000 20.0000 7 3 -159.6267 -142.1513 10.0000 20.0000 7 4 -148.9849 -131.9970 10.0000 20.0000 7 5 -138.3431 -121.8427 10.0000 20.0000 7 6 -127.7013 -111.6885 10.0000 20.0000 7 7 -117.0596 -101.5342 10.0000 20.0000 7 8 -106.4178 -91.3799 10.0000 20.0000 7 9 -95.7760 -81.2257 10.0000 20.0000 7 10 -85.1342 -71.0714 10.0000 20.0000 7 11 -74.4924 -60.9171 10.0000 20.0000 7 12 -63.8507 -50.7629 10.0000 20.0000 7 13 -53.2089 -40.6086 10.0000 20.0000 7 14 -42.5671 -30.4543 10.0000 20.0000 7 15 -31.9253 -20.3001 10.0000 20.0000 7 16 -21.2836 -10.1458 10.0000 20.0000 7 17 -10.6418 0.0089 10.0000 20.0000 7 18 0.0000 10.6506 10.0000 20.0000 7 19 10.1543 21.2924 10.0000 20.0000 7 20 20.3085 31.9342 10.0000 20.0000 7 21 30.4628 42.5760 10.0000 20.0000 7 22 40.6171 53.2178 10.0000 20.0000 7 23 50.7713 63.8595 10.0000 20.0000 7 24 60.9256 74.5013 10.0000 20.0000 7 25 71.0799 85.1431 10.0000 20.0000 7 26 81.2341 95.7849 10.0000 20.0000 7 27 91.3884 106.4266 10.0000 20.0000 7 28 101.5427 117.0684 10.0000 20.0000 7 29 111.6969 127.7102 10.0000 20.0000 7 30 121.8512 138.3520 10.0000 20.0000 7 31 132.0055 148.9938 10.0000 20.0000 7 32 142.1597 159.6355 10.0000 20.0000 7 33 152.3140 170.2773 10.0000 20.0000 7 34 162.4683 180.0000 10.0000 20.0000 7 35 172.6225 180.0000 10.0000 19.1833 8 0 -180.0000 -169.9917 -0.0000 10.0000 8 1 -172.6225 -159.9917 -0.0000 10.0000 8 2 -162.4683 -149.9917 -0.0000 10.0000 8 3 -152.3140 -139.9917 -0.0000 10.0000 8 4 -142.1597 -129.9917 -0.0000 10.0000 8 5 -132.0055 -119.9917 -0.0000 10.0000 8 6 -121.8512 -109.9917 -0.0000 10.0000 8 7 -111.6969 -99.9917 -0.0000 10.0000 8 8 -101.5427 -89.9917 -0.0000 10.0000 8 9 -91.3884 -79.9917 -0.0000 10.0000 8 10 -81.2341 -69.9917 -0.0000 10.0000 8 11 -71.0799 -59.9917 -0.0000 10.0000 8 12 -60.9256 -49.9917 -0.0000 10.0000 8 13 -50.7713 -39.9917 -0.0000 10.0000 8 14 -40.6171 -29.9917 -0.0000 10.0000 8 15 -30.4628 -19.9917 -0.0000 10.0000 8 16 -20.3085 -9.9917 -0.0000 10.0000 8 17 -10.1543 0.0085 -0.0000 10.0000 8 18 0.0000 10.1627 -0.0000 10.0000 8 19 10.0000 20.3170 -0.0000 10.0000 8 20 20.0000 30.4713 -0.0000 10.0000 8 21 30.0000 40.6255 -0.0000 10.0000 8 22 40.0000 50.7798 -0.0000 10.0000 8 23 50.0000 60.9341 -0.0000 10.0000 8 24 60.0000 71.0883 -0.0000 10.0000 8 25 70.0000 81.2426 -0.0000 10.0000 8 26 80.0000 91.3969 -0.0000 10.0000 8 27 90.0000 101.5511 -0.0000 10.0000 8 28 100.0000 111.7054 -0.0000 10.0000 8 29 110.0000 121.8597 -0.0000 10.0000 8 30 120.0000 132.0139 -0.0000 10.0000 8 31 130.0000 142.1682 -0.0000 10.0000 8 32 140.0000 152.3225 -0.0000 10.0000 8 33 150.0000 162.4767 -0.0000 10.0000 8 34 160.0000 172.6310 -0.0000 10.0000 8 35 170.0000 180.0000 -0.0000 10.0000 9 0 -180.0000 -169.9917 -10.0000 -0.0000 9 1 -172.6225 -159.9917 -10.0000 -0.0000 9 2 -162.4683 -149.9917 -10.0000 -0.0000 9 3 -152.3140 -139.9917 -10.0000 -0.0000 9 4 -142.1597 -129.9917 -10.0000 -0.0000 9 5 -132.0055 -119.9917 -10.0000 -0.0000 9 6 -121.8512 -109.9917 -10.0000 -0.0000 9 7 -111.6969 -99.9917 -10.0000 -0.0000 9 8 -101.5427 -89.9917 -10.0000 -0.0000 9 9 -91.3884 -79.9917 -10.0000 -0.0000 9 10 -81.2341 -69.9917 -10.0000 -0.0000 9 11 -71.0799 -59.9917 -10.0000 -0.0000 9 12 -60.9256 -49.9917 -10.0000 -0.0000 9 13 -50.7713 -39.9917 -10.0000 -0.0000 9 14 -40.6171 -29.9917 -10.0000 -0.0000 9 15 -30.4628 -19.9917 -10.0000 -0.0000 9 16 -20.3085 -9.9917 -10.0000 -0.0000 9 17 -10.1543 0.0085 -10.0000 -0.0000 9 18 0.0000 10.1627 -10.0000 -0.0000 9 19 10.0000 20.3170 -10.0000 -0.0000 9 20 20.0000 30.4713 -10.0000 -0.0000 9 21 30.0000 40.6255 -10.0000 -0.0000 9 22 40.0000 50.7798 -10.0000 -0.0000 9 23 50.0000 60.9341 -10.0000 -0.0000 9 24 60.0000 71.0883 -10.0000 -0.0000 9 25 70.0000 81.2426 -10.0000 -0.0000 9 26 80.0000 91.3969 -10.0000 -0.0000 9 27 90.0000 101.5511 -10.0000 -0.0000 9 28 100.0000 111.7054 -10.0000 -0.0000 9 29 110.0000 121.8597 -10.0000 -0.0000 9 30 120.0000 132.0139 -10.0000 -0.0000 9 31 130.0000 142.1682 -10.0000 -0.0000 9 32 140.0000 152.3225 -10.0000 -0.0000 9 33 150.0000 162.4767 -10.0000 -0.0000 9 34 160.0000 172.6310 -10.0000 -0.0000 9 35 170.0000 180.0000 -10.0000 -0.0000 10 0 -180.0000 -172.6141 -19.1917 -10.0000 10 1 -180.0000 -162.4598 -20.0000 -10.0000 10 2 -170.2684 -152.3055 -20.0000 -10.0000 10 3 -159.6267 -142.1513 -20.0000 -10.0000 10 4 -148.9849 -131.9970 -20.0000 -10.0000 10 5 -138.3431 -121.8427 -20.0000 -10.0000 10 6 -127.7013 -111.6885 -20.0000 -10.0000 10 7 -117.0596 -101.5342 -20.0000 -10.0000 10 8 -106.4178 -91.3799 -20.0000 -10.0000 10 9 -95.7760 -81.2257 -20.0000 -10.0000 10 10 -85.1342 -71.0714 -20.0000 -10.0000 10 11 -74.4924 -60.9171 -20.0000 -10.0000 10 12 -63.8507 -50.7629 -20.0000 -10.0000 10 13 -53.2089 -40.6086 -20.0000 -10.0000 10 14 -42.5671 -30.4543 -20.0000 -10.0000 10 15 -31.9253 -20.3001 -20.0000 -10.0000 10 16 -21.2836 -10.1458 -20.0000 -10.0000 10 17 -10.6418 0.0089 -20.0000 -10.0000 10 18 0.0000 10.6506 -20.0000 -10.0000 10 19 10.1543 21.2924 -20.0000 -10.0000 10 20 20.3085 31.9342 -20.0000 -10.0000 10 21 30.4628 42.5760 -20.0000 -10.0000 10 22 40.6171 53.2178 -20.0000 -10.0000 10 23 50.7713 63.8595 -20.0000 -10.0000 10 24 60.9256 74.5013 -20.0000 -10.0000 10 25 71.0799 85.1431 -20.0000 -10.0000 10 26 81.2341 95.7849 -20.0000 -10.0000 10 27 91.3884 106.4266 -20.0000 -10.0000 10 28 101.5427 117.0684 -20.0000 -10.0000 10 29 111.6969 127.7102 -20.0000 -10.0000 10 30 121.8512 138.3520 -20.0000 -10.0000 10 31 132.0055 148.9938 -20.0000 -10.0000 10 32 142.1597 159.6355 -20.0000 -10.0000 10 33 152.3140 170.2773 -20.0000 -10.0000 10 34 162.4683 180.0000 -20.0000 -10.0000 10 35 172.6225 180.0000 -19.1833 -10.0000 11 0 -999.0000 -999.0000 -99.0000 -99.0000 11 1 -180.0000 -170.2596 -27.2667 -20.0000 11 2 -180.0000 -159.6178 -30.0000 -20.0000 11 3 -173.2051 -148.9760 -30.0000 -20.0000 11 4 -161.6581 -138.3342 -30.0000 -20.0000 11 5 -150.1111 -127.6925 -30.0000 -20.0000 11 6 -138.5641 -117.0507 -30.0000 -20.0000 11 7 -127.0171 -106.4089 -30.0000 -20.0000 11 8 -115.4701 -95.7671 -30.0000 -20.0000 11 9 -103.9230 -85.1254 -30.0000 -20.0000 11 10 -92.3760 -74.4836 -30.0000 -20.0000 11 11 -80.8290 -63.8418 -30.0000 -20.0000 11 12 -69.2820 -53.2000 -30.0000 -20.0000 11 13 -57.7350 -42.5582 -30.0000 -20.0000 11 14 -46.1880 -31.9165 -30.0000 -20.0000 11 15 -34.6410 -21.2747 -30.0000 -20.0000 11 16 -23.0940 -10.6329 -30.0000 -20.0000 11 17 -11.5470 0.0096 -30.0000 -20.0000 11 18 0.0000 11.5566 -30.0000 -20.0000 11 19 10.6418 23.1036 -30.0000 -20.0000 11 20 21.2836 34.6506 -30.0000 -20.0000 11 21 31.9253 46.1976 -30.0000 -20.0000 11 22 42.5671 57.7446 -30.0000 -20.0000 11 23 53.2089 69.2917 -30.0000 -20.0000 11 24 63.8507 80.8387 -30.0000 -20.0000 11 25 74.4924 92.3857 -30.0000 -20.0000 11 26 85.1342 103.9327 -30.0000 -20.0000 11 27 95.7760 115.4797 -30.0000 -20.0000 11 28 106.4178 127.0267 -30.0000 -20.0000 11 29 117.0596 138.5737 -30.0000 -20.0000 11 30 127.7013 150.1207 -30.0000 -20.0000 11 31 138.3431 161.6677 -30.0000 -20.0000 11 32 148.9849 173.2147 -30.0000 -20.0000 11 33 159.6267 180.0000 -30.0000 -20.0000 11 34 170.2684 180.0000 -27.2667 -20.0000 11 35 -999.0000 -999.0000 -99.0000 -99.0000 12 0 -999.0000 -999.0000 -99.0000 -99.0000 12 1 -999.0000 -999.0000 -99.0000 -99.0000 12 2 -180.0000 -173.1955 -33.5583 -30.0000 12 3 -180.0000 -161.6485 -38.9500 -30.0000 12 4 -180.0000 -150.1014 -40.0000 -30.0000 12 5 -169.7029 -138.5544 -40.0000 -30.0000 12 6 -156.6489 -127.0074 -40.0000 -30.0000 12 7 -143.5948 -115.4604 -40.0000 -30.0000 12 8 -130.5407 -103.9134 -40.0000 -30.0000 12 9 -117.4867 -92.3664 -40.0000 -30.0000 12 10 -104.4326 -80.8194 -40.0000 -30.0000 12 11 -91.3785 -69.2724 -40.0000 -30.0000 12 12 -78.3244 -57.7254 -40.0000 -30.0000 12 13 -65.2704 -46.1784 -40.0000 -30.0000 12 14 -52.2163 -34.6314 -40.0000 -30.0000 12 15 -39.1622 -23.0844 -40.0000 -30.0000 12 16 -26.1081 -11.5374 -40.0000 -30.0000 12 17 -13.0541 0.0109 -40.0000 -30.0000 12 18 0.0000 13.0650 -40.0000 -30.0000 12 19 11.5470 26.1190 -40.0000 -30.0000 12 20 23.0940 39.1731 -40.0000 -30.0000 12 21 34.6410 52.2272 -40.0000 -30.0000 12 22 46.1880 65.2812 -40.0000 -30.0000 12 23 57.7350 78.3353 -40.0000 -30.0000 12 24 69.2820 91.3894 -40.0000 -30.0000 12 25 80.8290 104.4435 -40.0000 -30.0000 12 26 92.3760 117.4975 -40.0000 -30.0000 12 27 103.9230 130.5516 -40.0000 -30.0000 12 28 115.4701 143.6057 -40.0000 -30.0000 12 29 127.0171 156.6598 -40.0000 -30.0000 12 30 138.5641 169.7138 -40.0000 -30.0000 12 31 150.1111 180.0000 -40.0000 -30.0000 12 32 161.6581 180.0000 -38.9417 -30.0000 12 33 173.2051 180.0000 -33.5583 -30.0000 12 34 -999.0000 -999.0000 -99.0000 -99.0000 12 35 -999.0000 -999.0000 -99.0000 -99.0000 13 0 -999.0000 -999.0000 -99.0000 -99.0000 13 1 -999.0000 -999.0000 -99.0000 -99.0000 13 2 -999.0000 -999.0000 -99.0000 -99.0000 13 3 -999.0000 -999.0000 -99.0000 -99.0000 13 4 -180.0000 -169.6921 -43.7667 -40.0000 13 5 -180.0000 -156.6380 -48.1917 -40.0000 13 6 -180.0000 -143.5839 -50.0000 -40.0000 13 7 -171.1296 -130.5299 -50.0000 -40.0000 13 8 -155.5724 -117.4758 -50.0000 -40.0000 13 9 -140.0151 -104.4217 -50.0000 -40.0000 13 10 -124.4579 -91.3676 -50.0000 -40.0000 13 11 -108.9007 -78.3136 -50.0000 -40.0000 13 12 -93.3434 -65.2595 -50.0000 -40.0000 13 13 -77.7862 -52.2054 -50.0000 -40.0000 13 14 -62.2290 -39.1513 -50.0000 -40.0000 13 15 -46.6717 -26.0973 -50.0000 -40.0000 13 16 -31.1145 -13.0432 -50.0000 -40.0000 13 17 -15.5572 0.0130 -50.0000 -40.0000 13 18 0.0000 15.5702 -50.0000 -40.0000 13 19 13.0541 31.1274 -50.0000 -40.0000 13 20 26.1081 46.6847 -50.0000 -40.0000 13 21 39.1622 62.2419 -50.0000 -40.0000 13 22 52.2163 77.7992 -50.0000 -40.0000 13 23 65.2704 93.3564 -50.0000 -40.0000 13 24 78.3244 108.9136 -50.0000 -40.0000 13 25 91.3785 124.4709 -50.0000 -40.0000 13 26 104.4326 140.0281 -50.0000 -40.0000 13 27 117.4867 155.5853 -50.0000 -40.0000 13 28 130.5407 171.1426 -50.0000 -40.0000 13 29 143.5948 180.0000 -50.0000 -40.0000 13 30 156.6489 180.0000 -48.1917 -40.0000 13 31 169.7029 180.0000 -43.7583 -40.0000 13 32 -999.0000 -999.0000 -99.0000 -99.0000 13 33 -999.0000 -999.0000 -99.0000 -99.0000 13 34 -999.0000 -999.0000 -99.0000 -99.0000 13 35 -999.0000 -999.0000 -99.0000 -99.0000 14 0 -999.0000 -999.0000 -99.0000 -99.0000 14 1 -999.0000 -999.0000 -99.0000 -99.0000 14 2 -999.0000 -999.0000 -99.0000 -99.0000 14 3 -999.0000 -999.0000 -99.0000 -99.0000 14 4 -999.0000 -999.0000 -99.0000 -99.0000 14 5 -999.0000 -999.0000 -99.0000 -99.0000 14 6 -180.0000 -171.1167 -52.3333 -50.0000 14 7 -180.0000 -155.5594 -56.2583 -50.0000 14 8 -180.0000 -140.0022 -60.0000 -50.0000 14 9 -180.0000 -124.4449 -60.0000 -50.0000 14 10 -160.0000 -108.8877 -60.0000 -50.0000 14 11 -140.0000 -93.3305 -60.0000 -50.0000 14 12 -120.0000 -77.7732 -60.0000 -50.0000 14 13 -100.0000 -62.2160 -60.0000 -50.0000 14 14 -80.0000 -46.6588 -60.0000 -50.0000 14 15 -60.0000 -31.1015 -60.0000 -50.0000 14 16 -40.0000 -15.5443 -60.0000 -50.0000 14 17 -20.0000 0.0167 -60.0000 -50.0000 14 18 0.0000 20.0167 -60.0000 -50.0000 14 19 15.5572 40.0167 -60.0000 -50.0000 14 20 31.1145 60.0167 -60.0000 -50.0000 14 21 46.6717 80.0167 -60.0000 -50.0000 14 22 62.2290 100.0167 -60.0000 -50.0000 14 23 77.7862 120.0167 -60.0000 -50.0000 14 24 93.3434 140.0167 -60.0000 -50.0000 14 25 108.9007 160.0167 -60.0000 -50.0000 14 26 124.4579 180.0000 -60.0000 -50.0000 14 27 140.0151 180.0000 -60.0000 -50.0000 14 28 155.5724 180.0000 -56.2500 -50.0000 14 29 171.1296 180.0000 -52.3333 -50.0000 14 30 -999.0000 -999.0000 -99.0000 -99.0000 14 31 -999.0000 -999.0000 -99.0000 -99.0000 14 32 -999.0000 -999.0000 -99.0000 -99.0000 14 33 -999.0000 -999.0000 -99.0000 -99.0000 14 34 -999.0000 -999.0000 -99.0000 -99.0000 14 35 -999.0000 -999.0000 -99.0000 -99.0000 15 0 -999.0000 -999.0000 -99.0000 -99.0000 15 1 -999.0000 -999.0000 -99.0000 -99.0000 15 2 -999.0000 -999.0000 -99.0000 -99.0000 15 3 -999.0000 -999.0000 -99.0000 -99.0000 15 4 -999.0000 -999.0000 -99.0000 -99.0000 15 5 -999.0000 -999.0000 -99.0000 -99.0000 15 6 -999.0000 -999.0000 -99.0000 -99.0000 15 7 -999.0000 -999.0000 -99.0000 -99.0000 15 8 -999.0000 -999.0000 -99.0000 -99.0000 15 9 -180.0000 -159.9833 -63.6167 -60.0000 15 10 -180.0000 -139.9833 -67.1167 -60.0000 15 11 -180.0000 -119.9833 -70.0000 -60.0000 15 12 -175.4283 -99.9833 -70.0000 -60.0000 15 13 -146.1902 -79.9833 -70.0000 -60.0000 15 14 -116.9522 -59.9833 -70.0000 -60.0000 15 15 -87.7141 -39.9833 -70.0000 -60.0000 15 16 -58.4761 -19.9833 -70.0000 -60.0000 15 17 -29.2380 0.0244 -70.0000 -60.0000 15 18 0.0000 29.2624 -70.0000 -60.0000 15 19 20.0000 58.5005 -70.0000 -60.0000 15 20 40.0000 87.7385 -70.0000 -60.0000 15 21 60.0000 116.9765 -70.0000 -60.0000 15 22 80.0000 146.2146 -70.0000 -60.0000 15 23 100.0000 175.4526 -70.0000 -60.0000 15 24 120.0000 180.0000 -70.0000 -60.0000 15 25 140.0000 180.0000 -67.1167 -60.0000 15 26 160.0000 180.0000 -63.6167 -60.0000 15 27 -999.0000 -999.0000 -99.0000 -99.0000 15 28 -999.0000 -999.0000 -99.0000 -99.0000 15 29 -999.0000 -999.0000 -99.0000 -99.0000 15 30 -999.0000 -999.0000 -99.0000 -99.0000 15 31 -999.0000 -999.0000 -99.0000 -99.0000 15 32 -999.0000 -999.0000 -99.0000 -99.0000 15 33 -999.0000 -999.0000 -99.0000 -99.0000 15 34 -999.0000 -999.0000 -99.0000 -99.0000 15 35 -999.0000 -999.0000 -99.0000 -99.0000 16 0 -999.0000 -999.0000 -99.0000 -99.0000 16 1 -999.0000 -999.0000 -99.0000 -99.0000 16 2 -999.0000 -999.0000 -99.0000 -99.0000 16 3 -999.0000 -999.0000 -99.0000 -99.0000 16 4 -999.0000 -999.0000 -99.0000 -99.0000 16 5 -999.0000 -999.0000 -99.0000 -99.0000 16 6 -999.0000 -999.0000 -99.0000 -99.0000 16 7 -999.0000 -999.0000 -99.0000 -99.0000 16 8 -999.0000 -999.0000 -99.0000 -99.0000 16 9 -999.0000 -999.0000 -99.0000 -99.0000 16 10 -999.0000 -999.0000 -99.0000 -99.0000 16 11 -180.0000 -175.4039 -70.5333 -70.0000 16 12 -180.0000 -146.1659 -73.8750 -70.0000 16 13 -180.0000 -116.9278 -77.1667 -70.0000 16 14 -180.0000 -87.6898 -80.0000 -70.0000 16 15 -172.7631 -58.4517 -80.0000 -70.0000 16 16 -115.1754 -29.2137 -80.0000 -70.0000 16 17 -57.5877 0.0480 -80.0000 -70.0000 16 18 0.0000 57.6357 -80.0000 -70.0000 16 19 29.2380 115.2234 -80.0000 -70.0000 16 20 58.4761 172.8111 -80.0000 -70.0000 16 21 87.7141 180.0000 -80.0000 -70.0000 16 22 116.9522 180.0000 -77.1583 -70.0000 16 23 146.1902 180.0000 -73.8750 -70.0000 16 24 175.4283 180.0000 -70.5333 -70.0000 16 25 -999.0000 -999.0000 -99.0000 -99.0000 16 26 -999.0000 -999.0000 -99.0000 -99.0000 16 27 -999.0000 -999.0000 -99.0000 -99.0000 16 28 -999.0000 -999.0000 -99.0000 -99.0000 16 29 -999.0000 -999.0000 -99.0000 -99.0000 16 30 -999.0000 -999.0000 -99.0000 -99.0000 16 31 -999.0000 -999.0000 -99.0000 -99.0000 16 32 -999.0000 -999.0000 -99.0000 -99.0000 16 33 -999.0000 -999.0000 -99.0000 -99.0000 16 34 -999.0000 -999.0000 -99.0000 -99.0000 16 35 -999.0000 -999.0000 -99.0000 -99.0000 17 0 -999.0000 -999.0000 -99.0000 -99.0000 17 1 -999.0000 -999.0000 -99.0000 -99.0000 17 2 -999.0000 -999.0000 -99.0000 -99.0000 17 3 -999.0000 -999.0000 -99.0000 -99.0000 17 4 -999.0000 -999.0000 -99.0000 -99.0000 17 5 -999.0000 -999.0000 -99.0000 -99.0000 17 6 -999.0000 -999.0000 -99.0000 -99.0000 17 7 -999.0000 -999.0000 -99.0000 -99.0000 17 8 -999.0000 -999.0000 -99.0000 -99.0000 17 9 -999.0000 -999.0000 -99.0000 -99.0000 17 10 -999.0000 -999.0000 -99.0000 -99.0000 17 11 -999.0000 -999.0000 -99.0000 -99.0000 17 12 -999.0000 -999.0000 -99.0000 -99.0000 17 13 -999.0000 -999.0000 -99.0000 -99.0000 17 14 -180.0000 -172.7151 -80.4083 -80.0000 17 15 -180.0000 -115.1274 -83.6250 -80.0000 17 16 -180.0000 -57.5397 -86.8167 -80.0000 17 17 -180.0000 57.2957 -90.0000 -80.0000 17 18 -0.0040 180.0000 -90.0000 -80.0000 17 19 57.5877 180.0000 -86.8167 -80.0000 17 20 115.1754 180.0000 -83.6250 -80.0000 17 21 172.7631 180.0000 -80.4083 -80.0000 17 22 -999.0000 -999.0000 -99.0000 -99.0000 17 23 -999.0000 -999.0000 -99.0000 -99.0000 17 24 -999.0000 -999.0000 -99.0000 -99.0000 17 25 -999.0000 -999.0000 -99.0000 -99.0000 17 26 -999.0000 -999.0000 -99.0000 -99.0000 17 27 -999.0000 -999.0000 -99.0000 -99.0000 17 28 -999.0000 -999.0000 -99.0000 -99.0000 17 29 -999.0000 -999.0000 -99.0000 -99.0000 17 30 -999.0000 -999.0000 -99.0000 -99.0000 17 31 -999.0000 -999.0000 -99.0000 -99.0000 17 32 -999.0000 -999.0000 -99.0000 -99.0000 17 33 -999.0000 -999.0000 -99.0000 -99.0000 17 34 -999.0000 -999.0000 -99.0000 -99.0000 17 35 -999.0000 -999.0000 -99.0000 -99.0000 \ No newline at end of file +Sinusoidal Grid Bounding Coordinates of MODLAND Tiles ntile_vert = 18 ntile_horiz = 36 iv ih lon_min lon_max lat_min lat_max 0 0 -999.0000 -999.0000 -99.0000 -99.0000 0 1 -999.0000 -999.0000 -99.0000 -99.0000 0 2 -999.0000 -999.0000 -99.0000 -99.0000 0 3 -999.0000 -999.0000 -99.0000 -99.0000 0 4 -999.0000 -999.0000 -99.0000 -99.0000 0 5 -999.0000 -999.0000 -99.0000 -99.0000 0 6 -999.0000 -999.0000 -99.0000 -99.0000 0 7 -999.0000 -999.0000 -99.0000 -99.0000 0 8 -999.0000 -999.0000 -99.0000 -99.0000 0 9 -999.0000 -999.0000 -99.0000 -99.0000 0 10 -999.0000 -999.0000 -99.0000 -99.0000 0 11 -999.0000 -999.0000 -99.0000 -99.0000 0 12 -999.0000 -999.0000 -99.0000 -99.0000 0 13 -999.0000 -999.0000 -99.0000 -99.0000 0 14 -180.0000 -172.7151 80.0000 80.4083 0 15 -180.0000 -115.1274 80.0000 83.6250 0 16 -180.0000 -57.5397 80.0000 86.8167 0 17 -180.0000 57.2957 80.0000 90.0000 0 18 -0.0040 180.0000 80.0000 90.0000 0 19 57.5877 180.0000 80.0000 86.8167 0 20 115.1754 180.0000 80.0000 83.6250 0 21 172.7631 180.0000 80.0000 80.4083 0 22 -999.0000 -999.0000 -99.0000 -99.0000 0 23 -999.0000 -999.0000 -99.0000 -99.0000 0 24 -999.0000 -999.0000 -99.0000 -99.0000 0 25 -999.0000 -999.0000 -99.0000 -99.0000 0 26 -999.0000 -999.0000 -99.0000 -99.0000 0 27 -999.0000 -999.0000 -99.0000 -99.0000 0 28 -999.0000 -999.0000 -99.0000 -99.0000 0 29 -999.0000 -999.0000 -99.0000 -99.0000 0 30 -999.0000 -999.0000 -99.0000 -99.0000 0 31 -999.0000 -999.0000 -99.0000 -99.0000 0 32 -999.0000 -999.0000 -99.0000 -99.0000 0 33 -999.0000 -999.0000 -99.0000 -99.0000 0 34 -999.0000 -999.0000 -99.0000 -99.0000 0 35 -999.0000 -999.0000 -99.0000 -99.0000 1 0 -999.0000 -999.0000 -99.0000 -99.0000 1 1 -999.0000 -999.0000 -99.0000 -99.0000 1 2 -999.0000 -999.0000 -99.0000 -99.0000 1 3 -999.0000 -999.0000 -99.0000 -99.0000 1 4 -999.0000 -999.0000 -99.0000 -99.0000 1 5 -999.0000 -999.0000 -99.0000 -99.0000 1 6 -999.0000 -999.0000 -99.0000 -99.0000 1 7 -999.0000 -999.0000 -99.0000 -99.0000 1 8 -999.0000 -999.0000 -99.0000 -99.0000 1 9 -999.0000 -999.0000 -99.0000 -99.0000 1 10 -999.0000 -999.0000 -99.0000 -99.0000 1 11 -180.0000 -175.4039 70.0000 70.5333 1 12 -180.0000 -146.1659 70.0000 73.8750 1 13 -180.0000 -116.9278 70.0000 77.1667 1 14 -180.0000 -87.6898 70.0000 80.0000 1 15 -172.7631 -58.4517 70.0000 80.0000 1 16 -115.1754 -29.2137 70.0000 80.0000 1 17 -57.5877 0.0480 70.0000 80.0000 1 18 0.0000 57.6357 70.0000 80.0000 1 19 29.2380 115.2234 70.0000 80.0000 1 20 58.4761 172.8111 70.0000 80.0000 1 21 87.7141 180.0000 70.0000 80.0000 1 22 116.9522 180.0000 70.0000 77.1583 1 23 146.1902 180.0000 70.0000 73.8750 1 24 175.4283 180.0000 70.0000 70.5333 1 25 -999.0000 -999.0000 -99.0000 -99.0000 1 26 -999.0000 -999.0000 -99.0000 -99.0000 1 27 -999.0000 -999.0000 -99.0000 -99.0000 1 28 -999.0000 -999.0000 -99.0000 -99.0000 1 29 -999.0000 -999.0000 -99.0000 -99.0000 1 30 -999.0000 -999.0000 -99.0000 -99.0000 1 31 -999.0000 -999.0000 -99.0000 -99.0000 1 32 -999.0000 -999.0000 -99.0000 -99.0000 1 33 -999.0000 -999.0000 -99.0000 -99.0000 1 34 -999.0000 -999.0000 -99.0000 -99.0000 1 35 -999.0000 -999.0000 -99.0000 -99.0000 2 0 -999.0000 -999.0000 -99.0000 -99.0000 2 1 -999.0000 -999.0000 -99.0000 -99.0000 2 2 -999.0000 -999.0000 -99.0000 -99.0000 2 3 -999.0000 -999.0000 -99.0000 -99.0000 2 4 -999.0000 -999.0000 -99.0000 -99.0000 2 5 -999.0000 -999.0000 -99.0000 -99.0000 2 6 -999.0000 -999.0000 -99.0000 -99.0000 2 7 -999.0000 -999.0000 -99.0000 -99.0000 2 8 -999.0000 -999.0000 -99.0000 -99.0000 2 9 -180.0000 -159.9833 60.0000 63.6167 2 10 -180.0000 -139.9833 60.0000 67.1167 2 11 -180.0000 -119.9833 60.0000 70.0000 2 12 -175.4283 -99.9833 60.0000 70.0000 2 13 -146.1902 -79.9833 60.0000 70.0000 2 14 -116.9522 -59.9833 60.0000 70.0000 2 15 -87.7141 -39.9833 60.0000 70.0000 2 16 -58.4761 -19.9833 60.0000 70.0000 2 17 -29.2380 0.0244 60.0000 70.0000 2 18 0.0000 29.2624 60.0000 70.0000 2 19 20.0000 58.5005 60.0000 70.0000 2 20 40.0000 87.7385 60.0000 70.0000 2 21 60.0000 116.9765 60.0000 70.0000 2 22 80.0000 146.2146 60.0000 70.0000 2 23 100.0000 175.4526 60.0000 70.0000 2 24 120.0000 180.0000 60.0000 70.0000 2 25 140.0000 180.0000 60.0000 67.1167 2 26 160.0000 180.0000 60.0000 63.6167 2 27 -999.0000 -999.0000 -99.0000 -99.0000 2 28 -999.0000 -999.0000 -99.0000 -99.0000 2 29 -999.0000 -999.0000 -99.0000 -99.0000 2 30 -999.0000 -999.0000 -99.0000 -99.0000 2 31 -999.0000 -999.0000 -99.0000 -99.0000 2 32 -999.0000 -999.0000 -99.0000 -99.0000 2 33 -999.0000 -999.0000 -99.0000 -99.0000 2 34 -999.0000 -999.0000 -99.0000 -99.0000 2 35 -999.0000 -999.0000 -99.0000 -99.0000 3 0 -999.0000 -999.0000 -99.0000 -99.0000 3 1 -999.0000 -999.0000 -99.0000 -99.0000 3 2 -999.0000 -999.0000 -99.0000 -99.0000 3 3 -999.0000 -999.0000 -99.0000 -99.0000 3 4 -999.0000 -999.0000 -99.0000 -99.0000 3 5 -999.0000 -999.0000 -99.0000 -99.0000 3 6 -180.0000 -171.1167 50.0000 52.3333 3 7 -180.0000 -155.5594 50.0000 56.2583 3 8 -180.0000 -140.0022 50.0000 60.0000 3 9 -180.0000 -124.4449 50.0000 60.0000 3 10 -160.0000 -108.8877 50.0000 60.0000 3 11 -140.0000 -93.3305 50.0000 60.0000 3 12 -120.0000 -77.7732 50.0000 60.0000 3 13 -100.0000 -62.2160 50.0000 60.0000 3 14 -80.0000 -46.6588 50.0000 60.0000 3 15 -60.0000 -31.1015 50.0000 60.0000 3 16 -40.0000 -15.5443 50.0000 60.0000 3 17 -20.0000 0.0167 50.0000 60.0000 3 18 0.0000 20.0167 50.0000 60.0000 3 19 15.5572 40.0167 50.0000 60.0000 3 20 31.1145 60.0167 50.0000 60.0000 3 21 46.6717 80.0167 50.0000 60.0000 3 22 62.2290 100.0167 50.0000 60.0000 3 23 77.7862 120.0167 50.0000 60.0000 3 24 93.3434 140.0167 50.0000 60.0000 3 25 108.9007 160.0167 50.0000 60.0000 3 26 124.4579 180.0000 50.0000 60.0000 3 27 140.0151 180.0000 50.0000 60.0000 3 28 155.5724 180.0000 50.0000 56.2500 3 29 171.1296 180.0000 50.0000 52.3333 3 30 -999.0000 -999.0000 -99.0000 -99.0000 3 31 -999.0000 -999.0000 -99.0000 -99.0000 3 32 -999.0000 -999.0000 -99.0000 -99.0000 3 33 -999.0000 -999.0000 -99.0000 -99.0000 3 34 -999.0000 -999.0000 -99.0000 -99.0000 3 35 -999.0000 -999.0000 -99.0000 -99.0000 4 0 -999.0000 -999.0000 -99.0000 -99.0000 4 1 -999.0000 -999.0000 -99.0000 -99.0000 4 2 -999.0000 -999.0000 -99.0000 -99.0000 4 3 -999.0000 -999.0000 -99.0000 -99.0000 4 4 -180.0000 -169.6921 40.0000 43.7667 4 5 -180.0000 -156.6380 40.0000 48.1917 4 6 -180.0000 -143.5839 40.0000 50.0000 4 7 -171.1296 -130.5299 40.0000 50.0000 4 8 -155.5724 -117.4758 40.0000 50.0000 4 9 -140.0151 -104.4217 40.0000 50.0000 4 10 -124.4579 -91.3676 40.0000 50.0000 4 11 -108.9007 -78.3136 40.0000 50.0000 4 12 -93.3434 -65.2595 40.0000 50.0000 4 13 -77.7862 -52.2054 40.0000 50.0000 4 14 -62.2290 -39.1513 40.0000 50.0000 4 15 -46.6717 -26.0973 40.0000 50.0000 4 16 -31.1145 -13.0432 40.0000 50.0000 4 17 -15.5572 0.0130 40.0000 50.0000 4 18 0.0000 15.5702 40.0000 50.0000 4 19 13.0541 31.1274 40.0000 50.0000 4 20 26.1081 46.6847 40.0000 50.0000 4 21 39.1622 62.2419 40.0000 50.0000 4 22 52.2163 77.7992 40.0000 50.0000 4 23 65.2704 93.3564 40.0000 50.0000 4 24 78.3244 108.9136 40.0000 50.0000 4 25 91.3785 124.4709 40.0000 50.0000 4 26 104.4326 140.0281 40.0000 50.0000 4 27 117.4867 155.5853 40.0000 50.0000 4 28 130.5407 171.1426 40.0000 50.0000 4 29 143.5948 180.0000 40.0000 50.0000 4 30 156.6489 180.0000 40.0000 48.1917 4 31 169.7029 180.0000 40.0000 43.7583 4 32 -999.0000 -999.0000 -99.0000 -99.0000 4 33 -999.0000 -999.0000 -99.0000 -99.0000 4 34 -999.0000 -999.0000 -99.0000 -99.0000 4 35 -999.0000 -999.0000 -99.0000 -99.0000 5 0 -999.0000 -999.0000 -99.0000 -99.0000 5 1 -999.0000 -999.0000 -99.0000 -99.0000 5 2 -180.0000 -173.1955 30.0000 33.5583 5 3 -180.0000 -161.6485 30.0000 38.9500 5 4 -180.0000 -150.1014 30.0000 40.0000 5 5 -169.7029 -138.5544 30.0000 40.0000 5 6 -156.6489 -127.0074 30.0000 40.0000 5 7 -143.5948 -115.4604 30.0000 40.0000 5 8 -130.5407 -103.9134 30.0000 40.0000 5 9 -117.4867 -92.3664 30.0000 40.0000 5 10 -104.4326 -80.8194 30.0000 40.0000 5 11 -91.3785 -69.2724 30.0000 40.0000 5 12 -78.3244 -57.7254 30.0000 40.0000 5 13 -65.2704 -46.1784 30.0000 40.0000 5 14 -52.2163 -34.6314 30.0000 40.0000 5 15 -39.1622 -23.0844 30.0000 40.0000 5 16 -26.1081 -11.5374 30.0000 40.0000 5 17 -13.0541 0.0109 30.0000 40.0000 5 18 0.0000 13.0650 30.0000 40.0000 5 19 11.5470 26.1190 30.0000 40.0000 5 20 23.0940 39.1731 30.0000 40.0000 5 21 34.6410 52.2272 30.0000 40.0000 5 22 46.1880 65.2812 30.0000 40.0000 5 23 57.7350 78.3353 30.0000 40.0000 5 24 69.2820 91.3894 30.0000 40.0000 5 25 80.8290 104.4435 30.0000 40.0000 5 26 92.3760 117.4975 30.0000 40.0000 5 27 103.9230 130.5516 30.0000 40.0000 5 28 115.4701 143.6057 30.0000 40.0000 5 29 127.0171 156.6598 30.0000 40.0000 5 30 138.5641 169.7138 30.0000 40.0000 5 31 150.1111 180.0000 30.0000 40.0000 5 32 161.6581 180.0000 30.0000 38.9417 5 33 173.2051 180.0000 30.0000 33.5583 5 34 -999.0000 -999.0000 -99.0000 -99.0000 5 35 -999.0000 -999.0000 -99.0000 -99.0000 6 0 -999.0000 -999.0000 -99.0000 -99.0000 6 1 -180.0000 -170.2596 20.0000 27.2667 6 2 -180.0000 -159.6178 20.0000 30.0000 6 3 -173.2051 -148.9760 20.0000 30.0000 6 4 -161.6581 -138.3342 20.0000 30.0000 6 5 -150.1111 -127.6925 20.0000 30.0000 6 6 -138.5641 -117.0507 20.0000 30.0000 6 7 -127.0171 -106.4089 20.0000 30.0000 6 8 -115.4701 -95.7671 20.0000 30.0000 6 9 -103.9230 -85.1254 20.0000 30.0000 6 10 -92.3760 -74.4836 20.0000 30.0000 6 11 -80.8290 -63.8418 20.0000 30.0000 6 12 -69.2820 -53.2000 20.0000 30.0000 6 13 -57.7350 -42.5582 20.0000 30.0000 6 14 -46.1880 -31.9165 20.0000 30.0000 6 15 -34.6410 -21.2747 20.0000 30.0000 6 16 -23.0940 -10.6329 20.0000 30.0000 6 17 -11.5470 0.0096 20.0000 30.0000 6 18 0.0000 11.5566 20.0000 30.0000 6 19 10.6418 23.1036 20.0000 30.0000 6 20 21.2836 34.6506 20.0000 30.0000 6 21 31.9253 46.1976 20.0000 30.0000 6 22 42.5671 57.7446 20.0000 30.0000 6 23 53.2089 69.2917 20.0000 30.0000 6 24 63.8507 80.8387 20.0000 30.0000 6 25 74.4924 92.3857 20.0000 30.0000 6 26 85.1342 103.9327 20.0000 30.0000 6 27 95.7760 115.4797 20.0000 30.0000 6 28 106.4178 127.0267 20.0000 30.0000 6 29 117.0596 138.5737 20.0000 30.0000 6 30 127.7013 150.1207 20.0000 30.0000 6 31 138.3431 161.6677 20.0000 30.0000 6 32 148.9849 173.2147 20.0000 30.0000 6 33 159.6267 180.0000 20.0000 30.0000 6 34 170.2684 180.0000 20.0000 27.2667 6 35 -999.0000 -999.0000 -99.0000 -99.0000 7 0 -180.0000 -172.6141 10.0000 19.1917 7 1 -180.0000 -162.4598 10.0000 20.0000 7 2 -170.2684 -152.3055 10.0000 20.0000 7 3 -159.6267 -142.1513 10.0000 20.0000 7 4 -148.9849 -131.9970 10.0000 20.0000 7 5 -138.3431 -121.8427 10.0000 20.0000 7 6 -127.7013 -111.6885 10.0000 20.0000 7 7 -117.0596 -101.5342 10.0000 20.0000 7 8 -106.4178 -91.3799 10.0000 20.0000 7 9 -95.7760 -81.2257 10.0000 20.0000 7 10 -85.1342 -71.0714 10.0000 20.0000 7 11 -74.4924 -60.9171 10.0000 20.0000 7 12 -63.8507 -50.7629 10.0000 20.0000 7 13 -53.2089 -40.6086 10.0000 20.0000 7 14 -42.5671 -30.4543 10.0000 20.0000 7 15 -31.9253 -20.3001 10.0000 20.0000 7 16 -21.2836 -10.1458 10.0000 20.0000 7 17 -10.6418 0.0089 10.0000 20.0000 7 18 0.0000 10.6506 10.0000 20.0000 7 19 10.1543 21.2924 10.0000 20.0000 7 20 20.3085 31.9342 10.0000 20.0000 7 21 30.4628 42.5760 10.0000 20.0000 7 22 40.6171 53.2178 10.0000 20.0000 7 23 50.7713 63.8595 10.0000 20.0000 7 24 60.9256 74.5013 10.0000 20.0000 7 25 71.0799 85.1431 10.0000 20.0000 7 26 81.2341 95.7849 10.0000 20.0000 7 27 91.3884 106.4266 10.0000 20.0000 7 28 101.5427 117.0684 10.0000 20.0000 7 29 111.6969 127.7102 10.0000 20.0000 7 30 121.8512 138.3520 10.0000 20.0000 7 31 132.0055 148.9938 10.0000 20.0000 7 32 142.1597 159.6355 10.0000 20.0000 7 33 152.3140 170.2773 10.0000 20.0000 7 34 162.4683 180.0000 10.0000 20.0000 7 35 172.6225 180.0000 10.0000 19.1833 8 0 -180.0000 -169.9917 -0.0000 10.0000 8 1 -172.6225 -159.9917 -0.0000 10.0000 8 2 -162.4683 -149.9917 -0.0000 10.0000 8 3 -152.3140 -139.9917 -0.0000 10.0000 8 4 -142.1597 -129.9917 -0.0000 10.0000 8 5 -132.0055 -119.9917 -0.0000 10.0000 8 6 -121.8512 -109.9917 -0.0000 10.0000 8 7 -111.6969 -99.9917 -0.0000 10.0000 8 8 -101.5427 -89.9917 -0.0000 10.0000 8 9 -91.3884 -79.9917 -0.0000 10.0000 8 10 -81.2341 -69.9917 -0.0000 10.0000 8 11 -71.0799 -59.9917 -0.0000 10.0000 8 12 -60.9256 -49.9917 -0.0000 10.0000 8 13 -50.7713 -39.9917 -0.0000 10.0000 8 14 -40.6171 -29.9917 -0.0000 10.0000 8 15 -30.4628 -19.9917 -0.0000 10.0000 8 16 -20.3085 -9.9917 -0.0000 10.0000 8 17 -10.1543 0.0085 -0.0000 10.0000 8 18 0.0000 10.1627 -0.0000 10.0000 8 19 10.0000 20.3170 -0.0000 10.0000 8 20 20.0000 30.4713 -0.0000 10.0000 8 21 30.0000 40.6255 -0.0000 10.0000 8 22 40.0000 50.7798 -0.0000 10.0000 8 23 50.0000 60.9341 -0.0000 10.0000 8 24 60.0000 71.0883 -0.0000 10.0000 8 25 70.0000 81.2426 -0.0000 10.0000 8 26 80.0000 91.3969 -0.0000 10.0000 8 27 90.0000 101.5511 -0.0000 10.0000 8 28 100.0000 111.7054 -0.0000 10.0000 8 29 110.0000 121.8597 -0.0000 10.0000 8 30 120.0000 132.0139 -0.0000 10.0000 8 31 130.0000 142.1682 -0.0000 10.0000 8 32 140.0000 152.3225 -0.0000 10.0000 8 33 150.0000 162.4767 -0.0000 10.0000 8 34 160.0000 172.6310 -0.0000 10.0000 8 35 170.0000 180.0000 -0.0000 10.0000 9 0 -180.0000 -169.9917 -10.0000 -0.0000 9 1 -172.6225 -159.9917 -10.0000 -0.0000 9 2 -162.4683 -149.9917 -10.0000 -0.0000 9 3 -152.3140 -139.9917 -10.0000 -0.0000 9 4 -142.1597 -129.9917 -10.0000 -0.0000 9 5 -132.0055 -119.9917 -10.0000 -0.0000 9 6 -121.8512 -109.9917 -10.0000 -0.0000 9 7 -111.6969 -99.9917 -10.0000 -0.0000 9 8 -101.5427 -89.9917 -10.0000 -0.0000 9 9 -91.3884 -79.9917 -10.0000 -0.0000 9 10 -81.2341 -69.9917 -10.0000 -0.0000 9 11 -71.0799 -59.9917 -10.0000 -0.0000 9 12 -60.9256 -49.9917 -10.0000 -0.0000 9 13 -50.7713 -39.9917 -10.0000 -0.0000 9 14 -40.6171 -29.9917 -10.0000 -0.0000 9 15 -30.4628 -19.9917 -10.0000 -0.0000 9 16 -20.3085 -9.9917 -10.0000 -0.0000 9 17 -10.1543 0.0085 -10.0000 -0.0000 9 18 0.0000 10.1627 -10.0000 -0.0000 9 19 10.0000 20.3170 -10.0000 -0.0000 9 20 20.0000 30.4713 -10.0000 -0.0000 9 21 30.0000 40.6255 -10.0000 -0.0000 9 22 40.0000 50.7798 -10.0000 -0.0000 9 23 50.0000 60.9341 -10.0000 -0.0000 9 24 60.0000 71.0883 -10.0000 -0.0000 9 25 70.0000 81.2426 -10.0000 -0.0000 9 26 80.0000 91.3969 -10.0000 -0.0000 9 27 90.0000 101.5511 -10.0000 -0.0000 9 28 100.0000 111.7054 -10.0000 -0.0000 9 29 110.0000 121.8597 -10.0000 -0.0000 9 30 120.0000 132.0139 -10.0000 -0.0000 9 31 130.0000 142.1682 -10.0000 -0.0000 9 32 140.0000 152.3225 -10.0000 -0.0000 9 33 150.0000 162.4767 -10.0000 -0.0000 9 34 160.0000 172.6310 -10.0000 -0.0000 9 35 170.0000 180.0000 -10.0000 -0.0000 10 0 -180.0000 -172.6141 -19.1917 -10.0000 10 1 -180.0000 -162.4598 -20.0000 -10.0000 10 2 -170.2684 -152.3055 -20.0000 -10.0000 10 3 -159.6267 -142.1513 -20.0000 -10.0000 10 4 -148.9849 -131.9970 -20.0000 -10.0000 10 5 -138.3431 -121.8427 -20.0000 -10.0000 10 6 -127.7013 -111.6885 -20.0000 -10.0000 10 7 -117.0596 -101.5342 -20.0000 -10.0000 10 8 -106.4178 -91.3799 -20.0000 -10.0000 10 9 -95.7760 -81.2257 -20.0000 -10.0000 10 10 -85.1342 -71.0714 -20.0000 -10.0000 10 11 -74.4924 -60.9171 -20.0000 -10.0000 10 12 -63.8507 -50.7629 -20.0000 -10.0000 10 13 -53.2089 -40.6086 -20.0000 -10.0000 10 14 -42.5671 -30.4543 -20.0000 -10.0000 10 15 -31.9253 -20.3001 -20.0000 -10.0000 10 16 -21.2836 -10.1458 -20.0000 -10.0000 10 17 -10.6418 0.0089 -20.0000 -10.0000 10 18 0.0000 10.6506 -20.0000 -10.0000 10 19 10.1543 21.2924 -20.0000 -10.0000 10 20 20.3085 31.9342 -20.0000 -10.0000 10 21 30.4628 42.5760 -20.0000 -10.0000 10 22 40.6171 53.2178 -20.0000 -10.0000 10 23 50.7713 63.8595 -20.0000 -10.0000 10 24 60.9256 74.5013 -20.0000 -10.0000 10 25 71.0799 85.1431 -20.0000 -10.0000 10 26 81.2341 95.7849 -20.0000 -10.0000 10 27 91.3884 106.4266 -20.0000 -10.0000 10 28 101.5427 117.0684 -20.0000 -10.0000 10 29 111.6969 127.7102 -20.0000 -10.0000 10 30 121.8512 138.3520 -20.0000 -10.0000 10 31 132.0055 148.9938 -20.0000 -10.0000 10 32 142.1597 159.6355 -20.0000 -10.0000 10 33 152.3140 170.2773 -20.0000 -10.0000 10 34 162.4683 180.0000 -20.0000 -10.0000 10 35 172.6225 180.0000 -19.1833 -10.0000 11 0 -999.0000 -999.0000 -99.0000 -99.0000 11 1 -180.0000 -170.2596 -27.2667 -20.0000 11 2 -180.0000 -159.6178 -30.0000 -20.0000 11 3 -173.2051 -148.9760 -30.0000 -20.0000 11 4 -161.6581 -138.3342 -30.0000 -20.0000 11 5 -150.1111 -127.6925 -30.0000 -20.0000 11 6 -138.5641 -117.0507 -30.0000 -20.0000 11 7 -127.0171 -106.4089 -30.0000 -20.0000 11 8 -115.4701 -95.7671 -30.0000 -20.0000 11 9 -103.9230 -85.1254 -30.0000 -20.0000 11 10 -92.3760 -74.4836 -30.0000 -20.0000 11 11 -80.8290 -63.8418 -30.0000 -20.0000 11 12 -69.2820 -53.2000 -30.0000 -20.0000 11 13 -57.7350 -42.5582 -30.0000 -20.0000 11 14 -46.1880 -31.9165 -30.0000 -20.0000 11 15 -34.6410 -21.2747 -30.0000 -20.0000 11 16 -23.0940 -10.6329 -30.0000 -20.0000 11 17 -11.5470 0.0096 -30.0000 -20.0000 11 18 0.0000 11.5566 -30.0000 -20.0000 11 19 10.6418 23.1036 -30.0000 -20.0000 11 20 21.2836 34.6506 -30.0000 -20.0000 11 21 31.9253 46.1976 -30.0000 -20.0000 11 22 42.5671 57.7446 -30.0000 -20.0000 11 23 53.2089 69.2917 -30.0000 -20.0000 11 24 63.8507 80.8387 -30.0000 -20.0000 11 25 74.4924 92.3857 -30.0000 -20.0000 11 26 85.1342 103.9327 -30.0000 -20.0000 11 27 95.7760 115.4797 -30.0000 -20.0000 11 28 106.4178 127.0267 -30.0000 -20.0000 11 29 117.0596 138.5737 -30.0000 -20.0000 11 30 127.7013 150.1207 -30.0000 -20.0000 11 31 138.3431 161.6677 -30.0000 -20.0000 11 32 148.9849 173.2147 -30.0000 -20.0000 11 33 159.6267 180.0000 -30.0000 -20.0000 11 34 170.2684 180.0000 -27.2667 -20.0000 11 35 -999.0000 -999.0000 -99.0000 -99.0000 12 0 -999.0000 -999.0000 -99.0000 -99.0000 12 1 -999.0000 -999.0000 -99.0000 -99.0000 12 2 -180.0000 -173.1955 -33.5583 -30.0000 12 3 -180.0000 -161.6485 -38.9500 -30.0000 12 4 -180.0000 -150.1014 -40.0000 -30.0000 12 5 -169.7029 -138.5544 -40.0000 -30.0000 12 6 -156.6489 -127.0074 -40.0000 -30.0000 12 7 -143.5948 -115.4604 -40.0000 -30.0000 12 8 -130.5407 -103.9134 -40.0000 -30.0000 12 9 -117.4867 -92.3664 -40.0000 -30.0000 12 10 -104.4326 -80.8194 -40.0000 -30.0000 12 11 -91.3785 -69.2724 -40.0000 -30.0000 12 12 -78.3244 -57.7254 -40.0000 -30.0000 12 13 -65.2704 -46.1784 -40.0000 -30.0000 12 14 -52.2163 -34.6314 -40.0000 -30.0000 12 15 -39.1622 -23.0844 -40.0000 -30.0000 12 16 -26.1081 -11.5374 -40.0000 -30.0000 12 17 -13.0541 0.0109 -40.0000 -30.0000 12 18 0.0000 13.0650 -40.0000 -30.0000 12 19 11.5470 26.1190 -40.0000 -30.0000 12 20 23.0940 39.1731 -40.0000 -30.0000 12 21 34.6410 52.2272 -40.0000 -30.0000 12 22 46.1880 65.2812 -40.0000 -30.0000 12 23 57.7350 78.3353 -40.0000 -30.0000 12 24 69.2820 91.3894 -40.0000 -30.0000 12 25 80.8290 104.4435 -40.0000 -30.0000 12 26 92.3760 117.4975 -40.0000 -30.0000 12 27 103.9230 130.5516 -40.0000 -30.0000 12 28 115.4701 143.6057 -40.0000 -30.0000 12 29 127.0171 156.6598 -40.0000 -30.0000 12 30 138.5641 169.7138 -40.0000 -30.0000 12 31 150.1111 180.0000 -40.0000 -30.0000 12 32 161.6581 180.0000 -38.9417 -30.0000 12 33 173.2051 180.0000 -33.5583 -30.0000 12 34 -999.0000 -999.0000 -99.0000 -99.0000 12 35 -999.0000 -999.0000 -99.0000 -99.0000 13 0 -999.0000 -999.0000 -99.0000 -99.0000 13 1 -999.0000 -999.0000 -99.0000 -99.0000 13 2 -999.0000 -999.0000 -99.0000 -99.0000 13 3 -999.0000 -999.0000 -99.0000 -99.0000 13 4 -180.0000 -169.6921 -43.7667 -40.0000 13 5 -180.0000 -156.6380 -48.1917 -40.0000 13 6 -180.0000 -143.5839 -50.0000 -40.0000 13 7 -171.1296 -130.5299 -50.0000 -40.0000 13 8 -155.5724 -117.4758 -50.0000 -40.0000 13 9 -140.0151 -104.4217 -50.0000 -40.0000 13 10 -124.4579 -91.3676 -50.0000 -40.0000 13 11 -108.9007 -78.3136 -50.0000 -40.0000 13 12 -93.3434 -65.2595 -50.0000 -40.0000 13 13 -77.7862 -52.2054 -50.0000 -40.0000 13 14 -62.2290 -39.1513 -50.0000 -40.0000 13 15 -46.6717 -26.0973 -50.0000 -40.0000 13 16 -31.1145 -13.0432 -50.0000 -40.0000 13 17 -15.5572 0.0130 -50.0000 -40.0000 13 18 0.0000 15.5702 -50.0000 -40.0000 13 19 13.0541 31.1274 -50.0000 -40.0000 13 20 26.1081 46.6847 -50.0000 -40.0000 13 21 39.1622 62.2419 -50.0000 -40.0000 13 22 52.2163 77.7992 -50.0000 -40.0000 13 23 65.2704 93.3564 -50.0000 -40.0000 13 24 78.3244 108.9136 -50.0000 -40.0000 13 25 91.3785 124.4709 -50.0000 -40.0000 13 26 104.4326 140.0281 -50.0000 -40.0000 13 27 117.4867 155.5853 -50.0000 -40.0000 13 28 130.5407 171.1426 -50.0000 -40.0000 13 29 143.5948 180.0000 -50.0000 -40.0000 13 30 156.6489 180.0000 -48.1917 -40.0000 13 31 169.7029 180.0000 -43.7583 -40.0000 13 32 -999.0000 -999.0000 -99.0000 -99.0000 13 33 -999.0000 -999.0000 -99.0000 -99.0000 13 34 -999.0000 -999.0000 -99.0000 -99.0000 13 35 -999.0000 -999.0000 -99.0000 -99.0000 14 0 -999.0000 -999.0000 -99.0000 -99.0000 14 1 -999.0000 -999.0000 -99.0000 -99.0000 14 2 -999.0000 -999.0000 -99.0000 -99.0000 14 3 -999.0000 -999.0000 -99.0000 -99.0000 14 4 -999.0000 -999.0000 -99.0000 -99.0000 14 5 -999.0000 -999.0000 -99.0000 -99.0000 14 6 -180.0000 -171.1167 -52.3333 -50.0000 14 7 -180.0000 -155.5594 -56.2583 -50.0000 14 8 -180.0000 -140.0022 -60.0000 -50.0000 14 9 -180.0000 -124.4449 -60.0000 -50.0000 14 10 -160.0000 -108.8877 -60.0000 -50.0000 14 11 -140.0000 -93.3305 -60.0000 -50.0000 14 12 -120.0000 -77.7732 -60.0000 -50.0000 14 13 -100.0000 -62.2160 -60.0000 -50.0000 14 14 -80.0000 -46.6588 -60.0000 -50.0000 14 15 -60.0000 -31.1015 -60.0000 -50.0000 14 16 -40.0000 -15.5443 -60.0000 -50.0000 14 17 -20.0000 0.0167 -60.0000 -50.0000 14 18 0.0000 20.0167 -60.0000 -50.0000 14 19 15.5572 40.0167 -60.0000 -50.0000 14 20 31.1145 60.0167 -60.0000 -50.0000 14 21 46.6717 80.0167 -60.0000 -50.0000 14 22 62.2290 100.0167 -60.0000 -50.0000 14 23 77.7862 120.0167 -60.0000 -50.0000 14 24 93.3434 140.0167 -60.0000 -50.0000 14 25 108.9007 160.0167 -60.0000 -50.0000 14 26 124.4579 180.0000 -60.0000 -50.0000 14 27 140.0151 180.0000 -60.0000 -50.0000 14 28 155.5724 180.0000 -56.2500 -50.0000 14 29 171.1296 180.0000 -52.3333 -50.0000 14 30 -999.0000 -999.0000 -99.0000 -99.0000 14 31 -999.0000 -999.0000 -99.0000 -99.0000 14 32 -999.0000 -999.0000 -99.0000 -99.0000 14 33 -999.0000 -999.0000 -99.0000 -99.0000 14 34 -999.0000 -999.0000 -99.0000 -99.0000 14 35 -999.0000 -999.0000 -99.0000 -99.0000 15 0 -999.0000 -999.0000 -99.0000 -99.0000 15 1 -999.0000 -999.0000 -99.0000 -99.0000 15 2 -999.0000 -999.0000 -99.0000 -99.0000 15 3 -999.0000 -999.0000 -99.0000 -99.0000 15 4 -999.0000 -999.0000 -99.0000 -99.0000 15 5 -999.0000 -999.0000 -99.0000 -99.0000 15 6 -999.0000 -999.0000 -99.0000 -99.0000 15 7 -999.0000 -999.0000 -99.0000 -99.0000 15 8 -999.0000 -999.0000 -99.0000 -99.0000 15 9 -180.0000 -159.9833 -63.6167 -60.0000 15 10 -180.0000 -139.9833 -67.1167 -60.0000 15 11 -180.0000 -119.9833 -70.0000 -60.0000 15 12 -175.4283 -99.9833 -70.0000 -60.0000 15 13 -146.1902 -79.9833 -70.0000 -60.0000 15 14 -116.9522 -59.9833 -70.0000 -60.0000 15 15 -87.7141 -39.9833 -70.0000 -60.0000 15 16 -58.4761 -19.9833 -70.0000 -60.0000 15 17 -29.2380 0.0244 -70.0000 -60.0000 15 18 0.0000 29.2624 -70.0000 -60.0000 15 19 20.0000 58.5005 -70.0000 -60.0000 15 20 40.0000 87.7385 -70.0000 -60.0000 15 21 60.0000 116.9765 -70.0000 -60.0000 15 22 80.0000 146.2146 -70.0000 -60.0000 15 23 100.0000 175.4526 -70.0000 -60.0000 15 24 120.0000 180.0000 -70.0000 -60.0000 15 25 140.0000 180.0000 -67.1167 -60.0000 15 26 160.0000 180.0000 -63.6167 -60.0000 15 27 -999.0000 -999.0000 -99.0000 -99.0000 15 28 -999.0000 -999.0000 -99.0000 -99.0000 15 29 -999.0000 -999.0000 -99.0000 -99.0000 15 30 -999.0000 -999.0000 -99.0000 -99.0000 15 31 -999.0000 -999.0000 -99.0000 -99.0000 15 32 -999.0000 -999.0000 -99.0000 -99.0000 15 33 -999.0000 -999.0000 -99.0000 -99.0000 15 34 -999.0000 -999.0000 -99.0000 -99.0000 15 35 -999.0000 -999.0000 -99.0000 -99.0000 16 0 -999.0000 -999.0000 -99.0000 -99.0000 16 1 -999.0000 -999.0000 -99.0000 -99.0000 16 2 -999.0000 -999.0000 -99.0000 -99.0000 16 3 -999.0000 -999.0000 -99.0000 -99.0000 16 4 -999.0000 -999.0000 -99.0000 -99.0000 16 5 -999.0000 -999.0000 -99.0000 -99.0000 16 6 -999.0000 -999.0000 -99.0000 -99.0000 16 7 -999.0000 -999.0000 -99.0000 -99.0000 16 8 -999.0000 -999.0000 -99.0000 -99.0000 16 9 -999.0000 -999.0000 -99.0000 -99.0000 16 10 -999.0000 -999.0000 -99.0000 -99.0000 16 11 -180.0000 -175.4039 -70.5333 -70.0000 16 12 -180.0000 -146.1659 -73.8750 -70.0000 16 13 -180.0000 -116.9278 -77.1667 -70.0000 16 14 -180.0000 -87.6898 -80.0000 -70.0000 16 15 -172.7631 -58.4517 -80.0000 -70.0000 16 16 -115.1754 -29.2137 -80.0000 -70.0000 16 17 -57.5877 0.0480 -80.0000 -70.0000 16 18 0.0000 57.6357 -80.0000 -70.0000 16 19 29.2380 115.2234 -80.0000 -70.0000 16 20 58.4761 172.8111 -80.0000 -70.0000 16 21 87.7141 180.0000 -80.0000 -70.0000 16 22 116.9522 180.0000 -77.1583 -70.0000 16 23 146.1902 180.0000 -73.8750 -70.0000 16 24 175.4283 180.0000 -70.5333 -70.0000 16 25 -999.0000 -999.0000 -99.0000 -99.0000 16 26 -999.0000 -999.0000 -99.0000 -99.0000 16 27 -999.0000 -999.0000 -99.0000 -99.0000 16 28 -999.0000 -999.0000 -99.0000 -99.0000 16 29 -999.0000 -999.0000 -99.0000 -99.0000 16 30 -999.0000 -999.0000 -99.0000 -99.0000 16 31 -999.0000 -999.0000 -99.0000 -99.0000 16 32 -999.0000 -999.0000 -99.0000 -99.0000 16 33 -999.0000 -999.0000 -99.0000 -99.0000 16 34 -999.0000 -999.0000 -99.0000 -99.0000 16 35 -999.0000 -999.0000 -99.0000 -99.0000 17 0 -999.0000 -999.0000 -99.0000 -99.0000 17 1 -999.0000 -999.0000 -99.0000 -99.0000 17 2 -999.0000 -999.0000 -99.0000 -99.0000 17 3 -999.0000 -999.0000 -99.0000 -99.0000 17 4 -999.0000 -999.0000 -99.0000 -99.0000 17 5 -999.0000 -999.0000 -99.0000 -99.0000 17 6 -999.0000 -999.0000 -99.0000 -99.0000 17 7 -999.0000 -999.0000 -99.0000 -99.0000 17 8 -999.0000 -999.0000 -99.0000 -99.0000 17 9 -999.0000 -999.0000 -99.0000 -99.0000 17 10 -999.0000 -999.0000 -99.0000 -99.0000 17 11 -999.0000 -999.0000 -99.0000 -99.0000 17 12 -999.0000 -999.0000 -99.0000 -99.0000 17 13 -999.0000 -999.0000 -99.0000 -99.0000 17 14 -180.0000 -172.7151 -80.4083 -80.0000 17 15 -180.0000 -115.1274 -83.6250 -80.0000 17 16 -180.0000 -57.5397 -86.8167 -80.0000 17 17 -180.0000 57.2957 -90.0000 -80.0000 17 18 -0.0040 180.0000 -90.0000 -80.0000 17 19 57.5877 180.0000 -86.8167 -80.0000 17 20 115.1754 180.0000 -83.6250 -80.0000 17 21 172.7631 180.0000 -80.4083 -80.0000 17 22 -999.0000 -999.0000 -99.0000 -99.0000 17 23 -999.0000 -999.0000 -99.0000 -99.0000 17 24 -999.0000 -999.0000 -99.0000 -99.0000 17 25 -999.0000 -999.0000 -99.0000 -99.0000 17 26 -999.0000 -999.0000 -99.0000 -99.0000 17 27 -999.0000 -999.0000 -99.0000 -99.0000 17 28 -999.0000 -999.0000 -99.0000 -99.0000 17 29 -999.0000 -999.0000 -99.0000 -99.0000 17 30 -999.0000 -999.0000 -99.0000 -99.0000 17 31 -999.0000 -999.0000 -99.0000 -99.0000 17 32 -999.0000 -999.0000 -99.0000 -99.0000 17 33 -999.0000 -999.0000 -99.0000 -99.0000 17 34 -999.0000 -999.0000 -99.0000 -99.0000 17 35 -999.0000 -999.0000 -99.0000 -99.0000 diff --git a/monetio/grids.py b/monetio/grids.py index 6f772637..af639a9b 100644 --- a/monetio/grids.py +++ b/monetio/grids.py @@ -17,8 +17,8 @@ def _geos_16_grid(dset): Description of returned object. """ - from pyresample import geometry from numpy import asarray + from pyresample import geometry projection = dset.goes_imager_projection h = projection.perspective_point_height @@ -36,9 +36,19 @@ def _geos_16_grid(dset): y_h = (y_ur - y_ll) / (len(y) - 1.0) / 2.0 # 1/2 grid size area_extent = (x_ll - x_h, y_ll - y_h, x_ur + x_h, y_ur + y_h) - proj_dict = {'a': float(a), 'b': float(b), 'lon_0': float(lon_0), 'h': float(h), 'proj': 'geos', 'units': 'm', 'sweep': sweep} - - area = geometry.AreaDefinition('GEOS_ABI', 'ABI', 'GOES_ABI', proj_dict, len(x), len(y), asarray(area_extent)) + proj_dict = { + "a": float(a), + "b": float(b), + "lon_0": float(lon_0), + "h": float(h), + "proj": "geos", + "units": "m", + "sweep": sweep, + } + + area = geometry.AreaDefinition( + "GEOS_ABI", "ABI", "GOES_ABI", proj_dict, len(x), len(y), asarray(area_extent) + ) return area @@ -57,9 +67,9 @@ def _get_sinu_grid_df(): """ from pandas import read_csv - f = path[:-8] + 'data/sn_bound_10deg.txt' + f = path[:-8] + "data/sn_bound_10deg.txt" td = read_csv(f, skiprows=4, delim_whitespace=True) - td = td.assign(ihiv='h' + td.ih.astype(str).str.zfill(2) + 'v' + td.iv.astype(str).str.zfill(2)) + td = td.assign(ihiv="h" + td.ih.astype(str).str.zfill(2) + "v" + td.iv.astype(str).str.zfill(2)) return td @@ -76,7 +86,7 @@ def _sinu_grid_latlon_boundary(h, v): def _get_sinu_xy(lon, lat): from pyproj import Proj - sinu = Proj('+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m') + sinu = Proj("+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m") return sinu(lon, lat) @@ -85,7 +95,9 @@ def _get_sinu_latlon(x, y): from pyproj import Proj xv, yv = meshgrid(x, y) - sinu = Proj('+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m, +R=6371007.181') + sinu = Proj( + "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m, +R=6371007.181" + ) return sinu(xv, yv, inverse=True) @@ -104,23 +116,25 @@ def get_modis_latlon_from_swath_hv(h, v, dset): x = linspace(xmin, xmax, len(dset.x)) y = linspace(ymin, ymax, len(dset.y)) lon, lat = _get_sinu_latlon(x, y) - dset.coords['longitude'] = (('x', 'y'), lon) - dset.coords['latitude'] = (('x', 'y'), lat) - dset.attrs['area_extent'] = (x.min(), y.min(), x.max(), y.max()) - dset.attrs['proj4_srs'] = '+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 ' '+b=6371007.181 +units=m' + dset.coords["longitude"] = (("x", "y"), lon) + dset.coords["latitude"] = (("x", "y"), lat) + dset.attrs["area_extent"] = (x.min(), y.min(), x.max(), y.max()) + dset.attrs["proj4_srs"] = ( + "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 " "+b=6371007.181 +units=m" + ) return dset def get_sinu_area_def(dset): - from pyresample import utils from pyproj import Proj + from pyresample import utils - p = Proj(dset.attrs['proj4_srs']) + p = Proj(dset.attrs["proj4_srs"]) proj4_args = p.srs - area_name = 'MODIS Grid Def' - area_id = 'modis' + area_name = "MODIS Grid Def" + area_id = "modis" proj_id = area_id - area_extent = dset.attrs['area_extent'] + area_extent = dset.attrs["area_extent"] nx, ny = dset.longitude.shape return utils.get_area_def(area_id, area_name, proj_id, proj4_args, nx, ny, area_extent) @@ -131,13 +145,18 @@ def get_ioapi_pyresample_area_def(ds, proj4_srs): y_size = ds.NROWS x_size = ds.NCOLS projection = utils.proj4_str_to_dict(proj4_srs) - proj_id = 'IOAPI_Dataset' - description = 'IOAPI area_def for pyresample' - area_id = 'MONET_Object_Grid' + proj_id = "IOAPI_Dataset" + description = "IOAPI area_def for pyresample" + area_id = "MONET_Object_Grid" x_ll, y_ll = ds.XORIG + ds.XCELL * 0.5, ds.YORIG + ds.YCELL * 0.5 - x_ur, y_ur = ds.XORIG + (ds.NCOLS * ds.XCELL) + 0.5 * ds.XCELL, ds.YORIG + (ds.YCELL * ds.NROWS) + 0.5 * ds.YCELL + x_ur, y_ur = ( + ds.XORIG + (ds.NCOLS * ds.XCELL) + 0.5 * ds.XCELL, + ds.YORIG + (ds.YCELL * ds.NROWS) + 0.5 * ds.YCELL, + ) area_extent = (x_ll, y_ll, x_ur, y_ur) - area_def = geometry.AreaDefinition(area_id, description, proj_id, projection, x_size, y_size, area_extent) + area_def = geometry.AreaDefinition( + area_id, description, proj_id, projection, x_size, y_size, area_extent + ) return area_def @@ -160,10 +179,10 @@ def get_generic_projection_from_proj4(lat, lon, proj4_srs): """ try: - from pyresample.utils import proj4_str_to_dict from pyresample.geometry import SwathDefinition + from pyresample.utils import proj4_str_to_dict except ImportError: - print('please install pyresample to use this functionality') + print("please install pyresample to use this functionality") swath = SwathDefinition(lats=lat, lons=lon) area = swath.compute_optimal_bb_area(proj4_str_to_dict(proj4_srs)) return area @@ -209,29 +228,35 @@ def _ioapi_grid_from_dataset(ds, earth_radius=6370000): """ pargs = dict() - pargs['lat_1'] = ds.P_ALP - pargs['lat_2'] = ds.P_BET - pargs['lat_0'] = ds.YCENT - pargs['lon_0'] = ds.P_GAM - pargs['center_lon'] = ds.XCENT - pargs['x0'] = ds.XORIG - pargs['y0'] = ds.YORIG - pargs['r'] = earth_radius + pargs["lat_1"] = ds.P_ALP + pargs["lat_2"] = ds.P_BET + pargs["lat_0"] = ds.YCENT + pargs["lon_0"] = ds.P_GAM + pargs["center_lon"] = ds.XCENT + pargs["x0"] = ds.XORIG + pargs["y0"] = ds.YORIG + pargs["r"] = earth_radius proj_id = ds.GDTYP if proj_id == 2: # Lambert - p4 = '+proj=lcc +lat_1={lat_1} +lat_2={lat_2} ' '+lat_0={lat_0} +lon_0={lon_0} ' '+x_0=0 +y_0=0 +datum=WGS84 +units=m +a={r} +b={r}' + p4 = ( + "+proj=lcc +lat_1={lat_1} +lat_2={lat_2} " + "+lat_0={lat_0} +lon_0={lon_0} " + "+x_0=0 +y_0=0 +datum=WGS84 +units=m +a={r} +b={r}" + ) p4 = p4.format(**pargs) elif proj_id == 4: # Polar stereo - p4 = '+proj=stere +lat_ts={lat_1} +lon_0={lon_0} +lat_0=90.0' '+x_0=0 +y_0=0 +a={r} +b={r}' + p4 = "+proj=stere +lat_ts={lat_1} +lon_0={lon_0} +lat_0=90.0" "+x_0=0 +y_0=0 +a={r} +b={r}" p4 = p4.format(**pargs) elif proj_id == 3: # Mercator - p4 = '+proj=merc +lat_ts={lat_1} ' '+lon_0={center_lon} ' '+x_0={x0} +y_0={y0} +a={r} +b={r}' + p4 = ( + "+proj=merc +lat_ts={lat_1} " "+lon_0={center_lon} " "+x_0={x0} +y_0={y0} +a={r} +b={r}" + ) p4 = p4.format(**pargs) else: - raise NotImplementedError('IOAPI proj not implemented yet: ' '{}'.format(proj_id)) + raise NotImplementedError("IOAPI proj not implemented yet: " "{}".format(proj_id)) # area_def = _get_ioapi_pyresample_area_def(ds) return p4 # , area_def @@ -253,7 +278,7 @@ def grid_from_dataset(ds, earth_radius=6370000): """ # maybe its an IOAPI file - if hasattr(ds, 'IOAPI_VERSION') or hasattr(ds, 'P_ALP'): + if hasattr(ds, "IOAPI_VERSION") or hasattr(ds, "P_ALP"): # IOAPI_VERSION return _ioapi_grid_from_dataset(ds, earth_radius=earth_radius) diff --git a/monetio/models/__init__.py b/monetio/models/__init__.py index 76d1225d..ef216023 100644 --- a/monetio/models/__init__.py +++ b/monetio/models/__init__.py @@ -1,4 +1,4 @@ -from . import camx, cmaq, fv3chem, hysplit, hytraj, ncep_grib, prepchem, pardump +from . import camx, cmaq, fv3chem, hysplit, hytraj, ncep_grib, pardump, prepchem __all__ = ["cmaq", "hysplit", "camx", "fv3chem", "ncep_grib", "prepchem", "hytraj", "pardump"] diff --git a/monetio/models/camx.py b/monetio/models/camx.py index 2bebf4de..b2821bad 100644 --- a/monetio/models/camx.py +++ b/monetio/models/camx.py @@ -1,8 +1,9 @@ """ CAMx File Reader """ +import xarray as xr from numpy import array, concatenate from pandas import Series, to_datetime -import xarray as xr -from ..grids import grid_from_dataset, get_ioapi_pyresample_area_def + +from ..grids import get_ioapi_pyresample_area_def, grid_from_dataset def can_do(index): @@ -33,18 +34,18 @@ def open_dataset(fname, earth_radius=6370000, convert_to_ppb=True, drop_duplicat """ # open the dataset using xarray - dset = xr.open_dataset(fname, engine='pseudonetcdf', backend_kwargs={'format': 'uamiv'}) + dset = xr.open_dataset(fname, engine="pseudonetcdf", backend_kwargs={"format": "uamiv"}) # get the grid information grid = grid_from_dataset(dset, earth_radius=earth_radius) area_def = get_ioapi_pyresample_area_def(dset, grid) # assign attributes for dataset and all DataArrays - dset = dset.assign_attrs({'proj4_srs': grid}) + dset = dset.assign_attrs({"proj4_srs": grid}) for i in dset.variables: - dset[i] = dset[i].assign_attrs({'proj4_srs': grid}) + dset[i] = dset[i].assign_attrs({"proj4_srs": grid}) for j in dset[i].attrs: dset[i].attrs[j] = dset[i].attrs[j].strip() - dset[i] = dset[i].assign_attrs({'area': area_def}) + dset[i] = dset[i].assign_attrs({"area": area_def}) dset = dset.assign_attrs(area=area_def) # add lazy diagnostic variables @@ -64,7 +65,7 @@ def open_dataset(fname, earth_radius=6370000, convert_to_ppb=True, drop_duplicat dset = _predefined_mapping_tables(dset) # rename dimensions - dset = dset.rename({'COL': 'x', 'ROW': 'y', 'LAY': 'z'}) + dset = dset.rename({"COL": "x", "ROW": "y", "LAY": "z"}) return dset @@ -90,18 +91,18 @@ def open_mfdataset(fname, earth_radius=6370000, convert_to_ppb=True, drop_duplic """ # open the dataset using xarray - dset = xr.open_mfdataset(fname, engine='pseudonetcdf', backend_kwargs={'format': 'uamiv'}) + dset = xr.open_mfdataset(fname, engine="pseudonetcdf", backend_kwargs={"format": "uamiv"}) # get the grid information grid = grid_from_dataset(dset, earth_radius=earth_radius) area_def = get_ioapi_pyresample_area_def(dset, grid) # assign attributes for dataset and all DataArrays - dset = dset.assign_attrs({'proj4_srs': grid}) + dset = dset.assign_attrs({"proj4_srs": grid}) for i in dset.variables: - dset[i] = dset[i].assign_attrs({'proj4_srs': grid}) + dset[i] = dset[i].assign_attrs({"proj4_srs": grid}) for j in dset[i].attrs: dset[i].attrs[j] = dset[i].attrs[j].strip() - dset[i] = dset[i].assign_attrs({'area': area_def}) + dset[i] = dset[i].assign_attrs({"area": area_def}) dset = dset.assign_attrs(area=area_def) # add lazy diagnostic variables @@ -121,7 +122,7 @@ def open_mfdataset(fname, earth_radius=6370000, convert_to_ppb=True, drop_duplic dset = _predefined_mapping_tables(dset) # rename dimensions - dset = dset.rename({'COL': 'x', 'ROW': 'y', 'LAY': 'z'}) + dset = dset.rename({"COL": "x", "ROW": "y", "LAY": "z"}) return dset @@ -143,18 +144,18 @@ def open_files(fname, earth_radius=6370000): """ # open the dataset using xarray - dset = xr.open_mfdataset(fname, engine='pseudonetcdf', backend_kwargs={'format': 'uamiv'}) + dset = xr.open_mfdataset(fname, engine="pseudonetcdf", backend_kwargs={"format": "uamiv"}) # get the grid information grid = grid_from_dataset(dset, earth_radius=earth_radius) area_def = get_ioapi_pyresample_area_def(dset, grid) # assign attributes for dataset and all DataArrays - dset = dset.assign_attrs({'proj4_srs': grid}) + dset = dset.assign_attrs({"proj4_srs": grid}) for i in dset.variables: - dset[i] = dset[i].assign_attrs({'proj4_srs': grid}) + dset[i] = dset[i].assign_attrs({"proj4_srs": grid}) for j in dset[i].attrs: dset[i].attrs[j] = dset[i].attrs[j].strip() - dset[i] = dset[i].assign_attrs({'area': area_def}) + dset[i] = dset[i].assign_attrs({"area": area_def}) dset = dset.assign_attrs(area=area_def) # add lazy diagnostic variables @@ -174,7 +175,7 @@ def open_files(fname, earth_radius=6370000): dset = _predefined_mapping_tables(dset) # rename dimensions - dset = dset.rename({'COL': 'x', 'ROW': 'y', 'LAY': 'z'}) + dset = dset.rename({"COL": "x", "ROW": "y", "LAY": "z"}) return dset @@ -182,16 +183,16 @@ def open_files(fname, earth_radius=6370000): def _get_times(d): idims = len(d.TFLAG.dims) if idims == 2: - tflag1 = Series(d['TFLAG'][:, 0]).astype(str).str.zfill(7) - tflag2 = Series(d['TFLAG'][:, 1]).astype(str).str.zfill(6) + tflag1 = Series(d["TFLAG"][:, 0]).astype(str).str.zfill(7) + tflag2 = Series(d["TFLAG"][:, 1]).astype(str).str.zfill(6) else: - tflag1 = Series(d['TFLAG'][:, 0, 0]).astype(str).str.zfill(7) - tflag2 = Series(d['TFLAG'][:, 0, 1]).astype(str).str.zfill(6) - date = to_datetime([i + j for i, j in zip(tflag1, tflag2)], format='%Y%j%H%M%S') - indexdates = Series(date).drop_duplicates(keep='last').index.values + tflag1 = Series(d["TFLAG"][:, 0, 0]).astype(str).str.zfill(7) + tflag2 = Series(d["TFLAG"][:, 0, 1]).astype(str).str.zfill(6) + date = to_datetime([i + j for i, j in zip(tflag1, tflag2)], format="%Y%j%H%M%S") + indexdates = Series(date).drop_duplicates(keep="last").index.values d = d.isel(TSTEP=indexdates) - d['TSTEP'] = date[indexdates] - return d.rename({'TSTEP': 'time'}) + d["TSTEP"] = date[indexdates] + return d.rename({"TSTEP": "time"}) def _get_latlon(dset): @@ -209,8 +210,8 @@ def _get_latlon(dset): """ lon, lat = dset.area.get_lonlats() - dset['longitude'] = xr.DataArray(lon[::-1, :], dims=['ROW', 'COL']) - dset['latitude'] = xr.DataArray(lat[::-1, :], dims=['ROW', 'COL']) + dset["longitude"] = xr.DataArray(lon[::-1, :], dims=["ROW", "COL"]) + dset["latitude"] = xr.DataArray(lat[::-1, :], dims=["ROW", "COL"]) dset = dset.assign_coords(longitude=dset.longitude, latitude=dset.latitude) return dset @@ -231,27 +232,29 @@ def add_lazy_pm25(d): """ keys = Series([i for i in d.variables]) allvars = Series(fine) - if 'PM25_TOT' in keys: - d['PM25'] = d['PM25_TOT'].chunk() + if "PM25_TOT" in keys: + d["PM25"] = d["PM25_TOT"].chunk() else: index = allvars.isin(keys) newkeys = allvars.loc[index] - d['PM25'] = add_multiple_lazy(d, newkeys) - d['PM25'].assign_attrs({'name': 'PM2.5', 'long_name': 'PM2.5'}) + d["PM25"] = add_multiple_lazy(d, newkeys) + d["PM25"].assign_attrs({"name": "PM2.5", "long_name": "PM2.5"}) return d def add_lazy_pm10(d): keys = Series([i for i in d.variables]) allvars = Series(concatenate([fine, coarse])) - if 'PM_TOT' in keys: - d['PM10'] = d['PM_TOT'].chunk() + if "PM_TOT" in keys: + d["PM10"] = d["PM_TOT"].chunk() else: index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] - d['PM10'] = add_multiple_lazy(d, newkeys) - d['PM10'] = d['PM10'].assign_attrs({'name': 'PM10', 'long_name': 'Particulate Matter < 10 microns'}) + d["PM10"] = add_multiple_lazy(d, newkeys) + d["PM10"] = d["PM10"].assign_attrs( + {"name": "PM10", "long_name": "Particulate Matter < 10 microns"} + ) return d @@ -261,21 +264,23 @@ def add_lazy_pm_course(d): index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] - d['PM_COURSE'] = add_multiple_lazy(d, newkeys) - d['PM_COURSE'] = d['PM_COURSE'].assign_attrs({'name': 'PM_COURSE', 'long_name': 'Course Mode Particulate Matter'}) + d["PM_COURSE"] = add_multiple_lazy(d, newkeys) + d["PM_COURSE"] = d["PM_COURSE"].assign_attrs( + {"name": "PM_COURSE", "long_name": "Course Mode Particulate Matter"} + ) return d def add_lazy_clf(d): keys = Series([i for i in d.variables]) - allvars = Series(['ACLI', 'ACLJ', 'ACLK']) + allvars = Series(["ACLI", "ACLJ", "ACLK"]) weights = Series([1, 1, 0.2]) index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] neww = weights.loc[index] - d['CLf'] = add_multiple_lazy(d, newkeys, weights=neww) - d['CLf'] = d['CLf'].assign_attrs({'name': 'CLf', 'long_name': 'Fine Mode particulate Cl'}) + d["CLf"] = add_multiple_lazy(d, newkeys, weights=neww) + d["CLf"] = d["CLf"].assign_attrs({"name": "CLf", "long_name": "Fine Mode particulate Cl"}) return d @@ -285,19 +290,19 @@ def add_lazy_noy(d): index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] - d['NOy'] = add_multiple_lazy(d, newkeys) - d['NOy'] = d['NOy'].assign_attrs({'name': 'NOy', 'long_name': 'NOy'}) + d["NOy"] = add_multiple_lazy(d, newkeys) + d["NOy"] = d["NOy"].assign_attrs({"name": "NOy", "long_name": "NOy"}) return d def add_lazy_nox(d): keys = Series([i for i in d.variables]) - allvars = Series(['NO', 'NOX']) + allvars = Series(["NO", "NOX"]) index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] - d['NOx'] = add_multiple_lazy(d, newkeys) - d['NOx'] = d['NOx'].assign_attrs({'name': 'NOx', 'long_name': 'NOx'}) + d["NOx"] = add_multiple_lazy(d, newkeys) + d["NOx"] = d["NOx"].assign_attrs({"name": "NOx", "long_name": "NOx"}) return d @@ -325,73 +330,139 @@ def _predefined_mapping_tables(dset): to_improve = {} to_nadp = {} to_aqs = { - 'OZONE': ['O3'], - 'PM2.5': ['PM25'], - 'CO': ['CO'], - 'NOY': ['NO', 'NO2', 'NO3', 'N2O5', 'HONO', 'HNO3', 'PAN', 'PANX', 'PNA', 'NTR', 'CRON', 'CRN2', 'CRNO', 'CRPX', 'OPAN'], - 'NOX': ['NO', 'NO2'], - 'SO2': ['SO2'], - 'NO': ['NO'], - 'NO2': ['NO2'], - 'SO4f': ['PSO4'], - 'PM10': ['PM10'], - 'NO3f': ['PNO3'], - 'ECf': ['PEC'], - 'OCf': ['OC'], - 'ETHANE': ['ETHA'], - 'BENZENE': ['BENZENE'], - 'TOLUENE': ['TOL'], - 'ISOPRENE': ['ISOP'], - 'O-XYLENE': ['XYL'], - 'WS': ['WSPD10'], - 'TEMP': ['TEMP2'], - 'WD': ['WDIR10'], - 'NAf': ['NA'], - 'NH4f': ['PNH4'], + "OZONE": ["O3"], + "PM2.5": ["PM25"], + "CO": ["CO"], + "NOY": [ + "NO", + "NO2", + "NO3", + "N2O5", + "HONO", + "HNO3", + "PAN", + "PANX", + "PNA", + "NTR", + "CRON", + "CRN2", + "CRNO", + "CRPX", + "OPAN", + ], + "NOX": ["NO", "NO2"], + "SO2": ["SO2"], + "NO": ["NO"], + "NO2": ["NO2"], + "SO4f": ["PSO4"], + "PM10": ["PM10"], + "NO3f": ["PNO3"], + "ECf": ["PEC"], + "OCf": ["OC"], + "ETHANE": ["ETHA"], + "BENZENE": ["BENZENE"], + "TOLUENE": ["TOL"], + "ISOPRENE": ["ISOP"], + "O-XYLENE": ["XYL"], + "WS": ["WSPD10"], + "TEMP": ["TEMP2"], + "WD": ["WDIR10"], + "NAf": ["NA"], + "NH4f": ["PNH4"], } to_airnow = { - 'OZONE': ['O3'], - 'PM2.5': ['PM25'], - 'CO': ['CO'], - 'NOY': ['NO', 'NO2', 'NO3', 'N2O5', 'HONO', 'HNO3', 'PAN', 'PANX', 'PNA', 'NTR', 'CRON', 'CRN2', 'CRNO', 'CRPX', 'OPAN'], - 'NOX': ['NO', 'NO2'], - 'SO2': ['SO2'], - 'NO': ['NO'], - 'NO2': ['NO2'], - 'SO4f': ['PSO4'], - 'PM10': ['PM10'], - 'NO3f': ['PNO3'], - 'ECf': ['PEC'], - 'OCf': ['OC'], - 'ETHANE': ['ETHA'], - 'BENZENE': ['BENZENE'], - 'TOLUENE': ['TOL'], - 'ISOPRENE': ['ISOP'], - 'O-XYLENE': ['XYL'], - 'WS': ['WSPD10'], - 'TEMP': ['TEMP2'], - 'WD': ['WDIR10'], - 'NAf': ['NA'], - 'NH4f': ['PNH4'], + "OZONE": ["O3"], + "PM2.5": ["PM25"], + "CO": ["CO"], + "NOY": [ + "NO", + "NO2", + "NO3", + "N2O5", + "HONO", + "HNO3", + "PAN", + "PANX", + "PNA", + "NTR", + "CRON", + "CRN2", + "CRNO", + "CRPX", + "OPAN", + ], + "NOX": ["NO", "NO2"], + "SO2": ["SO2"], + "NO": ["NO"], + "NO2": ["NO2"], + "SO4f": ["PSO4"], + "PM10": ["PM10"], + "NO3f": ["PNO3"], + "ECf": ["PEC"], + "OCf": ["OC"], + "ETHANE": ["ETHA"], + "BENZENE": ["BENZENE"], + "TOLUENE": ["TOL"], + "ISOPRENE": ["ISOP"], + "O-XYLENE": ["XYL"], + "WS": ["WSPD10"], + "TEMP": ["TEMP2"], + "WD": ["WDIR10"], + "NAf": ["NA"], + "NH4f": ["PNH4"], } to_crn = {} to_aeronet = {} to_cems = {} mapping_tables = { - 'improve': to_improve, - 'aqs': to_aqs, - 'airnow': to_airnow, - 'crn': to_crn, - 'cems': to_cems, - 'nadp': to_nadp, - 'aeronet': to_aeronet, + "improve": to_improve, + "aqs": to_aqs, + "airnow": to_airnow, + "crn": to_crn, + "cems": to_cems, + "nadp": to_nadp, + "aeronet": to_aeronet, } - dset = dset.assign_attrs({'mapping_tables': mapping_tables}) + dset = dset.assign_attrs({"mapping_tables": mapping_tables}) return dset # Arrays for different gasses and pm groupings -coarse = array(['CPRM', 'CCRS']) -fine = array(['NA', 'PSO4', 'PNO3', 'PNH4', 'PH2O', 'PCL', 'PEC', 'FPRM', 'FCRS', 'SOA1', 'SOA2', 'SOA3', 'SOA4']) -noy_gas = array(['NO', 'NO2', 'NO3', 'N2O5', 'HONO', 'HNO3', 'PAN', 'PANX', 'PNA', 'NTR', 'CRON', 'CRN2', 'CRNO', 'CRPX', 'OPAN']) -poc = array(['SOA1', 'SOA2', 'SOA3', 'SOA4']) +coarse = array(["CPRM", "CCRS"]) +fine = array( + [ + "NA", + "PSO4", + "PNO3", + "PNH4", + "PH2O", + "PCL", + "PEC", + "FPRM", + "FCRS", + "SOA1", + "SOA2", + "SOA3", + "SOA4", + ] +) +noy_gas = array( + [ + "NO", + "NO2", + "NO3", + "N2O5", + "HONO", + "HNO3", + "PAN", + "PANX", + "PNA", + "NTR", + "CRON", + "CRN2", + "CRNO", + "CRPX", + "OPAN", + ] +) +poc = array(["SOA1", "SOA2", "SOA3", "SOA4"]) diff --git a/monetio/models/cdump2netcdf.py b/monetio/models/cdump2netcdf.py index 989c2f97..6c572397 100644 --- a/monetio/models/cdump2netcdf.py +++ b/monetio/models/cdump2netcdf.py @@ -1,10 +1,8 @@ import datetime import os -import sys import numpy as np -import xarray as xr -from monetio.models import hysplit + # import monet.utilhysplit.hysp_func as hf from netCDF4 import Dataset @@ -96,55 +94,47 @@ def meters2FL(meters): def get_topbottom(lev): - top = 'FL' + str(meters2FL(lev[-1])) - bottom = 'FL' + str(meters2FL(lev[0])) - print('level', lev[0], bottom) - print('level', lev[-1], top) + top = "FL" + str(meters2FL(lev[-1])) + bottom = "FL" + str(meters2FL(lev[0])) + print("level", lev[0], bottom) + print("level", lev[-1], top) return top, bottom def handle_levels(levlist): nlev = len(levlist) - # divide into three pieces + # Divide into three pieces piece = int(np.floor(nlev / 3.0)) - jjj = 0 lev1 = levlist[0:piece] - lev2 = levlist[piece:2 * piece] - lev3 = levlist[2 * piece:] + lev2 = levlist[piece : 2 * piece] + lev3 = levlist[2 * piece :] print(piece, lev1, lev2, lev3) return lev1, lev2, lev3 # def cdump2awips(flist, outname, format='NETCDF4', d1=None, d2=None): -def cdump2awips(xrash1, - dt, - outname, - mscale=1, - munit='unit', - format='NETCDF4', - d1=None, - d2=None): +def cdump2awips(xrash1, dt, outname, mscale=1, munit="unit", format="NETCDF4", d1=None, d2=None): # mass loading should be in g/m2 to compare to satellite. # concentration should be in mg/m3 to compare to threshold levels. - sample_time = np.timedelta64(int(dt), 'h') + sample_time = np.timedelta64(int(dt), "h") # stack the ensemble and source dimensions so it is one dimension - xrash = xrash1.stack(ensemble=('ens', 'source')) + xrash = xrash1.stack(ensemble=("ens", "source")) # put dimensionsin correct order. - xrash.transpose('time', 'ensemble', 'x', 'y', 'z') + xrash.transpose("time", "ensemble", "x", "y", "z") # array with mass loading rather than concentration. mass = mass_loading(xrash) - levelra = xrash.z.values - nra = xrash.values + # levelra = xrash.z.values + # nra = xrash.values iii = 0 for tm in xrash.time.values: - fid = Dataset(outname + str(iii) + '.nc', 'w', format='NETCDF4') + fid = Dataset(outname + str(iii) + ".nc", "w", format="NETCDF4") # GLOBAL ATTRIBUTES - fid.SourceFiles = 'Kasatochi' + fid.SourceFiles = "Kasatochi" fid.time_origin = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") fid.mass_per_hour = mscale fid.mass_unit = munit @@ -154,25 +144,26 @@ def cdump2awips(xrash1, # DEFINE DIMENSIONS lat_shape = xrash.shape[2] lon_shape = xrash.shape[3] - lat = fid.createDimension('latitude', lat_shape) - lon = fid.createDimension('longitude', lon_shape) + # lat = fid.createDimension("latitude", lat_shape) + # lon = fid.createDimension("longitude", lon_shape) clevs = [0.2, 2, 4, 10, 100] - clevels = fid.createDimension('contour_levels', len(clevs)) - ens_shape = xrash.coords['ensemble'].shape[0] - ensemble = fid.createDimension('ensid', ens_shape) + # clevels = fid.createDimension("contour_levels", len(clevs)) + ens_shape = xrash.coords["ensemble"].shape[0] + # ensemble = fid.createDimension("ensid", ens_shape) + + # time = fid.createDimension("time", 1) # one time per file + # bnds = fid.createDimension("bnds", 2) # two bounds per time. + # origin = fid.createDimension("origins", 1) - time = fid.createDimension('time', 1) # one time per file - bnds = fid.createDimension('bnds', 2) # two bounds per time. - origin = fid.createDimension('origins', 1) # Scalar variables - #latra, lonra = hf.getlatlon(hxr) + # latra, lonra = hf.getlatlon(hxr) latra = xrash.latitude[:, 0] lonra = xrash.longitude[0] - print('ens shape', ens_shape) - print('lat shape', lat_shape, latra.shape, xrash.shape) - print('lon shape', lon_shape, lonra.shape, xrash.shape) + print("ens shape", ens_shape) + print("lat shape", lat_shape, latra.shape, xrash.shape) + print("lon shape", lon_shape, lonra.shape, xrash.shape) # Define variables with attributes # concid = fid.createVariable('conc', 'f4', # ('source', 'ensemble','levels','latitude','longitude')) @@ -183,76 +174,74 @@ def cdump2awips(xrash1, levs = xrash.z.values lev1, lev2, lev3 = handle_levels(levs) - coordlist = ('time', 'ensid', 'latitude', 'longitude') - concidl1 = fid.createVariable('Flight_levelA', 'f4', coordlist) - concidl1.units = munit + '/m3' - concidl1.long_name = 'Concentration Array' - concidl1.bottomlevel = 'FL0' + coordlist = ("time", "ensid", "latitude", "longitude") + concidl1 = fid.createVariable("Flight_levelA", "f4", coordlist) + concidl1.units = munit + "/m3" + concidl1.long_name = "Concentration Array" + concidl1.bottomlevel = "FL0" top, bottom = get_topbottom(lev1) concidl1.toplevel = top - concidl2 = fid.createVariable('Flight_levelB', 'f4', coordlist) - concidl2.units = munit + '/m3' - concidl2.long_name = 'Concentration Array' + concidl2 = fid.createVariable("Flight_levelB", "f4", coordlist) + concidl2.units = munit + "/m3" + concidl2.long_name = "Concentration Array" concidl2.bottomlevel = top top, bottom = get_topbottom(lev2) concidl2.toplevel = top - concidl3 = fid.createVariable('Flight_levelC', 'f4', coordlist) - concidl3.units = munit + '/m3' - concidl3.long_name = 'Concentration Array' + concidl3 = fid.createVariable("Flight_levelC", "f4", coordlist) + concidl3.units = munit + "/m3" + concidl3.long_name = "Concentration Array" concidl3.bottomlevel = top top, bottom = get_topbottom(lev3) concidl3.toplevel = top - massid = fid.createVariable('MassLoading', 'f4', coordlist) - massid.units = munit + '/m2' - massid.long_name = 'Mass Loading from surface to ' + top + massid = fid.createVariable("MassLoading", "f4", coordlist) + massid.units = munit + "/m2" + massid.long_name = "Mass Loading from surface to " + top # Standard Contour levels for concentration in mg/m3 - clevelid = fid.createVariable('Contour_levels', 'f4', - ('contour_levels')) + clevelid = fid.createVariable("Contour_levels", "f4", ("contour_levels")) clevelid[:] = clevs # Dimension with different ensemble members. - ensembleid = fid.createVariable('ensemble', 'str', ('ensid')) - ensid = fid.createVariable('ensid', 'i4', ('ensid')) - sourceid = fid.createVariable('source', 'str', ('ensid')) - - latid = fid.createVariable('latitude', 'f4', ('latitude')) - latid.long_name = 'latitude degrees north from the equator' - latid.units = 'degrees_north' - latid.point_spacing = 'even' - lonid = fid.createVariable('longitude', 'f4', ('longitude')) - lonid.long_name = 'longitude degrees east from the greenwhich meridian' - lonid.units = 'degrees_east' - lonid.point_spacing = 'even' - - timeid = fid.createVariable('time', 'f4', ('time')) + ensembleid = fid.createVariable("ensemble", "str", ("ensid")) + ensid = fid.createVariable("ensid", "i4", ("ensid")) + sourceid = fid.createVariable("source", "str", ("ensid")) + + latid = fid.createVariable("latitude", "f4", ("latitude")) + latid.long_name = "latitude degrees north from the equator" + latid.units = "degrees_north" + latid.point_spacing = "even" + lonid = fid.createVariable("longitude", "f4", ("longitude")) + lonid.long_name = "longitude degrees east from the greenwhich meridian" + lonid.units = "degrees_east" + lonid.point_spacing = "even" + + timeid = fid.createVariable("time", "f4", ("time")) # attributes for time grid. - timeid.units = 'days since 1970-01-01 00:00:00' - timeid.standard_name = 'time' - timeid.bounds = 'time_bnds' - timeid.calendar = 'gregorian' + timeid.units = "days since 1970-01-01 00:00:00" + timeid.standard_name = "time" + timeid.bounds = "time_bnds" + timeid.calendar = "gregorian" - time_bnds = fid.createVariable('time_bnds', 'f4', ('time', 'bnds')) + time_bnds = fid.createVariable("time_bnds", "f4", ("time", "bnds")) # Put data into variables # only one time per file. - epoch = np.datetime64('1970-01-01T00:00:00Z') + epoch = np.datetime64("1970-01-01T00:00:00Z") date1 = xrash.time[iii].values - t1 = (xrash.time[iii].values - epoch) / np.timedelta64(1, 's') + t1 = (xrash.time[iii].values - epoch) / np.timedelta64(1, "s") # change seconds to days t1 = t1 / (24.0 * 60 * 60) - t2 = ((xrash.time[0].values + sample_time) - epoch) / np.timedelta64( - 1, 's') + t2 = ((xrash.time[0].values + sample_time) - epoch) / np.timedelta64(1, "s") t2 = t2 / (24.0 * 60 * 60) temp = xrash.loc[dict(time=date1)] print(temp.values.shape) - print('date', date1, type(lev1)) + print("date", date1, type(lev1)) mult = 1 concidl1[:] = makeconc(xrash.copy(), date1, list(lev1), mult=mult) @@ -268,12 +257,11 @@ def cdump2awips(xrash1, time_bnds[:] = [[t1, t2]] # these may be duplicated since ensemble and source # dimensions are stacked. - ensembleid[:] = xrash.coords['ens'].values - sourceid[:] = xrash.coords['source'].values + ensembleid[:] = xrash.coords["ens"].values + sourceid[:] = xrash.coords["source"].values ensid[:] = np.arange(1, ens_shape + 1) fid.close() - import sys - # sys.exit() + iii += 1 @@ -297,56 +285,56 @@ def makeconc(xrash, date1, level, mult=1, tr=True, verbose=False): total_thickness += dhash[lev] c1 = mult * xrash.sel(time=date1, z=level) if verbose: - print('MAX BEFORE ', np.max(c1)) - print('length', len(level), tlist, dhash) + print("MAX BEFORE ", np.max(c1)) + print("length", len(level), tlist, dhash) c1 = mass_loading(c1, tlist) c1 = c1 / total_thickness if verbose: - print('Max AFTER', np.max(c1)) - c1 = c1.expand_dims('time') + print("Max AFTER", np.max(c1)) + c1 = c1.expand_dims("time") # this line is for netcdf awips output if tr: - c1 = c1.transpose('time', 'ensemble', 'y', 'x') + c1 = c1.transpose("time", "ensemble", "y", "x") if verbose: - print('C1', c1) + print("C1", c1) if verbose: print(c1.shape) return c1 def maketestblist(): - d1 = datetime.datetime(2008, 8, 8, 12) - d2 = datetime.datetime(2008, 8, 8, 13) + # d1 = datetime.datetime(2008, 8, 8, 12) + # d2 = datetime.datetime(2008, 8, 8, 13) blist = {} flist = [] - dname = '/pub/Scratch/alicec/KASATOCHI/cylindrical/e3/' - fname = 'wrf.e3.bin' - flist.append((os.path.join(dname, fname), 'WRF')) - fname = 'gdas.e3.bin' - flist.append((os.path.join(dname, fname), 'GDAS')) - blist['S3'] = flist + dname = "/pub/Scratch/alicec/KASATOCHI/cylindrical/e3/" + fname = "wrf.e3.bin" + flist.append((os.path.join(dname, fname), "WRF")) + fname = "gdas.e3.bin" + flist.append((os.path.join(dname, fname), "GDAS")) + blist["S3"] = flist flist1 = [] - dname = '/pub/Scratch/alicec/KASATOCHI/cylindrical/e2/' - fname = 'wrf.e2.bin' - flist1.append((os.path.join(dname, fname), 'WRF')) - fname = 'gdas.e2.bin' - flist1.append((os.path.join(dname, fname), 'GDAS')) - blist['S2'] = flist1 + dname = "/pub/Scratch/alicec/KASATOCHI/cylindrical/e2/" + fname = "wrf.e2.bin" + flist1.append((os.path.join(dname, fname), "WRF")) + fname = "gdas.e2.bin" + flist1.append((os.path.join(dname, fname), "GDAS")) + blist["S2"] = flist1 return blist def maketestncfile(): blist = maketestblist() - oname = 'out.nc' + oname = "out.nc" d1 = datetime.datetime(2008, 8, 8, 12) d2 = datetime.datetime(2008, 8, 8, 13) cdump2awips(blist, oname, d1=d1, d2=d2) -def maketestra(): - d1 = None - d2 = None - blist = maketestblist() - xrash, dt = combine_cdump(blist, d1=d1, d2=d2) - return xrash, dt +# def maketestra(): +# d1 = None +# d2 = None +# blist = maketestblist() +# xrash, dt = combine_cdump(blist, d1=d1, d2=d2) +# return xrash, dt diff --git a/monetio/models/cmaq.py b/monetio/models/cmaq.py index 18e495f3..691dc3fc 100644 --- a/monetio/models/cmaq.py +++ b/monetio/models/cmaq.py @@ -54,9 +54,9 @@ def open_dataset(fname, earth_radius=6370000, convert_to_ppb=True, drop_duplicat grid = grid_from_dataset(dset, earth_radius=earth_radius) area_def = get_ioapi_pyresample_area_def(dset, grid) # assign attributes for dataset and all DataArrays - dset = dset.assign_attrs({'proj4_srs': grid}) + dset = dset.assign_attrs({"proj4_srs": grid}) for i in dset.variables: - dset[i] = dset[i].assign_attrs({'proj4_srs': grid}) + dset[i] = dset[i].assign_attrs({"proj4_srs": grid}) for j in dset[i].attrs: dset[i].attrs[j] = dset[i].attrs[j].strip() # dset[i] = dset[i].assign_attrs({'area': area_def}) @@ -73,26 +73,28 @@ def open_dataset(fname, earth_radius=6370000, convert_to_ppb=True, drop_duplicat # dset = _predefined_mapping_tables(dset) # rename dimensions - dset = dset.rename({'COL': 'x', 'ROW': 'y', 'LAY': 'z'}) + dset = dset.rename({"COL": "x", "ROW": "y", "LAY": "z"}) # convert all gas species to ppbv if convert_to_ppb: for i in dset.variables: - if 'units' in dset[i].attrs: - if 'ppmV' in dset[i].attrs['units']: + if "units" in dset[i].attrs: + if "ppmV" in dset[i].attrs["units"]: dset[i] *= 1000.0 - dset[i].attrs['units'] = 'ppbV' + dset[i].attrs["units"] = "ppbV" # convert 'micrograms to \mu g' for i in dset.variables: - if 'units' in dset[i].attrs: - if 'micrograms' in dset[i].attrs['units']: - dset[i].attrs['units'] = '$\mu g m^{-3}$' + if "units" in dset[i].attrs: + if "micrograms" in dset[i].attrs["units"]: + dset[i].attrs["units"] = r"$\mu g m^{-3}$" return dset -def open_mfdataset(fname, earth_radius=6370000, convert_to_ppb=True, drop_duplicates=False, **kwargs): +def open_mfdataset( + fname, earth_radius=6370000, convert_to_ppb=True, drop_duplicates=False, **kwargs +): """Method to open CMAQ IOAPI netcdf files. Parameters @@ -113,7 +115,7 @@ def open_mfdataset(fname, earth_radius=6370000, convert_to_ppb=True, drop_duplic """ # open the dataset using xarray - dset = xr.open_mfdataset(fname, concat_dim='TSTEP', **kwargs) + dset = xr.open_mfdataset(fname, concat_dim="TSTEP", **kwargs) # add lazy diagnostic variables dset = add_lazy_pm25(dset) @@ -133,9 +135,9 @@ def open_mfdataset(fname, earth_radius=6370000, convert_to_ppb=True, drop_duplic grid = grid_from_dataset(dset, earth_radius=earth_radius) area_def = get_ioapi_pyresample_area_def(dset, grid) # assign attributes for dataset and all DataArrays - dset = dset.assign_attrs({'proj4_srs': grid}) + dset = dset.assign_attrs({"proj4_srs": grid}) for i in dset.variables: - dset[i] = dset[i].assign_attrs({'proj4_srs': grid}) + dset[i] = dset[i].assign_attrs({"proj4_srs": grid}) for j in dset[i].attrs: dset[i].attrs[j] = dset[i].attrs[j].strip() # dset[i] = dset[i].assign_attrs({'area': area_def}) @@ -150,21 +152,21 @@ def open_mfdataset(fname, earth_radius=6370000, convert_to_ppb=True, drop_duplic # get Predefined mapping tables for observations # d set = _predefined_mapping_tables(dset) # rename dimensions - dset = dset.rename({'COL': 'x', 'ROW': 'y', 'LAY': 'z'}) + dset = dset.rename({"COL": "x", "ROW": "y", "LAY": "z"}) # convert all gas species to ppbv if convert_to_ppb: for i in dset.variables: - if 'units' in dset[i].attrs: - if 'ppmV' in dset[i].attrs['units']: + if "units" in dset[i].attrs: + if "ppmV" in dset[i].attrs["units"]: dset[i] *= 1000.0 - dset[i].attrs['units'] = 'ppbV' + dset[i].attrs["units"] = "ppbV" # convert 'micrograms to \mu g' for i in dset.variables: - if 'units' in dset[i].attrs: - if 'micrograms' in dset[i].attrs['units']: - dset[i].attrs['units'] = '$\mu g m^{-3}$' + if "units" in dset[i].attrs: + if "micrograms" in dset[i].attrs["units"]: + dset[i].attrs["units"] = r"$\mu g m^{-3}$" return dset @@ -172,19 +174,19 @@ def open_mfdataset(fname, earth_radius=6370000, convert_to_ppb=True, drop_duplic def _get_times(d, drop_duplicates): idims = len(d.TFLAG.dims) if idims == 2: - tflag1 = Series(d['TFLAG'][:, 0]).astype(str).str.zfill(7) - tflag2 = Series(d['TFLAG'][:, 1]).astype(str).str.zfill(6) + tflag1 = Series(d["TFLAG"][:, 0]).astype(str).str.zfill(7) + tflag2 = Series(d["TFLAG"][:, 1]).astype(str).str.zfill(6) else: - tflag1 = Series(d['TFLAG'][:, 0, 0]).astype(str).str.zfill(7) - tflag2 = Series(d['TFLAG'][:, 0, 1]).astype(str).str.zfill(6) - date = to_datetime([i + j for i, j in zip(tflag1, tflag2)], format='%Y%j%H%M%S') + tflag1 = Series(d["TFLAG"][:, 0, 0]).astype(str).str.zfill(7) + tflag2 = Series(d["TFLAG"][:, 0, 1]).astype(str).str.zfill(6) + date = to_datetime([i + j for i, j in zip(tflag1, tflag2)], format="%Y%j%H%M%S") if drop_duplicates: - indexdates = Series(date).drop_duplicates(keep='last').index.values + indexdates = Series(date).drop_duplicates(keep="last").index.values d = d.isel(TSTEP=indexdates) - d['TSTEP'] = date[indexdates] + d["TSTEP"] = date[indexdates] else: - d['TSTEP'] = date - return d.rename({'TSTEP': 'time'}) + d["TSTEP"] = date + return d.rename({"TSTEP": "time"}) def _get_latlon(dset, area): @@ -202,8 +204,8 @@ def _get_latlon(dset, area): """ lon, lat = area.get_lonlats() - dset['longitude'] = xr.DataArray(lon[::-1, :], dims=['ROW', 'COL']) - dset['latitude'] = xr.DataArray(lat[::-1, :], dims=['ROW', 'COL']) + dset["longitude"] = xr.DataArray(lon[::-1, :], dims=["ROW", "COL"]) + dset["latitude"] = xr.DataArray(lat[::-1, :], dims=["ROW", "COL"]) dset = dset.assign_coords(longitude=dset.longitude, latitude=dset.latitude) return dset @@ -295,29 +297,37 @@ def add_lazy_pm25(d): 0.2, ] ) - if 'PM25_TOT' in keys.to_list(): - d['PM25'] = d['PM25_TOT'] + if "PM25_TOT" in keys.to_list(): + d["PM25"] = d["PM25_TOT"] else: index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] newweights = weights.loc[index] - d['PM25'] = add_multiple_lazy(d, newkeys, weights=newweights) - d['PM25'] = d['PM25'].assign_attrs({'units': '$\mu g m^{-3}$', 'name': 'PM2.5', 'long_name': 'PM2.5'}) + d["PM25"] = add_multiple_lazy(d, newkeys, weights=newweights) + d["PM25"] = d["PM25"].assign_attrs( + {"units": r"$\mu g m^{-3}$", "name": "PM2.5", "long_name": "PM2.5"} + ) return d def add_lazy_pm10(d): keys = _get_keys(d) allvars = Series(concatenate([aitken, accumulation, coarse])) - if 'PMC_TOT' in keys.to_list(): - d['PM10'] = d['PMC_TOT'] + if "PMC_TOT" in keys.to_list(): + d["PM10"] = d["PMC_TOT"] else: index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] - d['PM10'] = add_multiple_lazy(d, newkeys) - d['PM10'] = d['PM10'].assign_attrs({'units': '$\mu g m^{-3}$', 'name': 'PM10', 'long_name': 'Particulate Matter < 10 microns'}) + d["PM10"] = add_multiple_lazy(d, newkeys) + d["PM10"] = d["PM10"].assign_attrs( + { + "units": r"$\mu g m^{-3}$", + "name": "PM10", + "long_name": "Particulate Matter < 10 microns", + } + ) return d @@ -327,86 +337,104 @@ def add_lazy_pm_course(d): index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] - d['PM_COURSE'] = add_multiple_lazy(d, newkeys) - d['PM_COURSE'] = d['PM_COURSE'].assign_attrs({'units': '$\mu g m^{-3}$', 'name': 'PM_COURSE', 'long_name': 'Course Mode Particulate Matter'}) + d["PM_COURSE"] = add_multiple_lazy(d, newkeys) + d["PM_COURSE"] = d["PM_COURSE"].assign_attrs( + { + "units": r"$\mu g m^{-3}$", + "name": "PM_COURSE", + "long_name": "Course Mode Particulate Matter", + } + ) return d def add_lazy_clf(d): keys = _get_keys(d) - allvars = Series(['ACLI', 'ACLJ', 'ACLK']) + allvars = Series(["ACLI", "ACLJ", "ACLK"]) weights = Series([1, 1, 0.2]) index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] neww = weights.loc[index] - d['CLf'] = add_multiple_lazy(d, newkeys, weights=neww) - d['CLf'] = d['CLf'].assign_attrs({'units': '$\mu g m^{-3}$', 'name': 'CLf', 'long_name': 'Fine Mode particulate Cl'}) + d["CLf"] = add_multiple_lazy(d, newkeys, weights=neww) + d["CLf"] = d["CLf"].assign_attrs( + {"units": r"$\mu g m^{-3}$", "name": "CLf", "long_name": "Fine Mode particulate Cl"} + ) return d def add_lazy_caf(d): keys = _get_keys(d) - allvars = Series(['ACAI', 'ACAJ', 'ASEACAT', 'ASOIL', 'ACORS']) + allvars = Series(["ACAI", "ACAJ", "ASEACAT", "ASOIL", "ACORS"]) weights = Series([1, 1, 0.2 * 32.0 / 1000.0, 0.2 * 83.8 / 1000.0, 0.2 * 56.2 / 1000.0]) index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] neww = weights.loc[index] - d['CAf'] = add_multiple_lazy(d, newkeys, weights=neww) - d['CAf'] = d['CAf'].assign_attrs({'units': '$\mu g m^{-3}$', 'name': 'CAf', 'long_name': 'Fine Mode particulate CA'}) + d["CAf"] = add_multiple_lazy(d, newkeys, weights=neww) + d["CAf"] = d["CAf"].assign_attrs( + {"units": r"$\mu g m^{-3}$", "name": "CAf", "long_name": "Fine Mode particulate CA"} + ) return d def add_lazy_naf(d): keys = _get_keys(d) - allvars = Series(['ANAI', 'ANAJ', 'ASEACAT', 'ASOIL', 'ACORS']) + allvars = Series(["ANAI", "ANAJ", "ASEACAT", "ASOIL", "ACORS"]) weights = Series([1, 1, 0.2 * 837.3 / 1000.0, 0.2 * 62.6 / 1000.0, 0.2 * 2.3 / 1000.0]) index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] neww = weights.loc[index] - d['NAf'] = add_multiple_lazy(d, newkeys, weights=neww) - d['NAf'] = d['NAf'].assign_attrs({'units': '$\mu g m^{-3}$', 'name': 'NAf', 'long_name': 'NAf'}) + d["NAf"] = add_multiple_lazy(d, newkeys, weights=neww) + d["NAf"] = d["NAf"].assign_attrs( + {"units": r"$\mu g m^{-3}$", "name": "NAf", "long_name": "NAf"} + ) return d def add_lazy_so4f(d): keys = _get_keys(d) - allvars = Series(['ASO4I', 'ASO4J', 'ASO4K']) + allvars = Series(["ASO4I", "ASO4J", "ASO4K"]) weights = Series([1.0, 1.0, 0.2]) index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] neww = weights.loc[index] - d['SO4f'] = add_multiple_lazy(d, newkeys, weights=neww) - d['SO4f'] = d['SO4f'].assign_attrs({'units': '$\mu g m^{-3}$', 'name': 'SO4f', 'long_name': 'SO4f'}) + d["SO4f"] = add_multiple_lazy(d, newkeys, weights=neww) + d["SO4f"] = d["SO4f"].assign_attrs( + {"units": r"$\mu g m^{-3}$", "name": "SO4f", "long_name": "SO4f"} + ) return d def add_lazy_nh4f(d): keys = _get_keys(d) - allvars = Series(['ANH4I', 'ANH4J', 'ANH4K']) + allvars = Series(["ANH4I", "ANH4J", "ANH4K"]) weights = Series([1.0, 1.0, 0.2]) index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] neww = weights.loc[index] - d['NH4f'] = add_multiple_lazy(d, newkeys, weights=neww) - d['NH4f'] = d['NH4f'].assign_attrs({'units': '$\mu g m^{-3}$', 'name': 'NH4f', 'long_name': 'NH4f'}) + d["NH4f"] = add_multiple_lazy(d, newkeys, weights=neww) + d["NH4f"] = d["NH4f"].assign_attrs( + {"units": r"$\mu g m^{-3}$", "name": "NH4f", "long_name": "NH4f"} + ) return d def add_lazy_no3f(d): keys = _get_keys(d) - allvars = Series(['ANO3I', 'ANO3J', 'ANO3K']) + allvars = Series(["ANO3I", "ANO3J", "ANO3K"]) weights = Series([1.0, 1.0, 0.2]) index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] neww = weights.loc[index] - d['NO3f'] = add_multiple_lazy(d, newkeys, weights=neww) - d['NO3f'] = d['NO3f'].assign_attrs({'units': '$\mu g m^{-3}$', 'name': 'NO3f', 'long_name': 'NO3f'}) + d["NO3f"] = add_multiple_lazy(d, newkeys, weights=neww) + d["NO3f"] = d["NO3f"].assign_attrs( + {"units": r"$\mu g m^{-3}$", "name": "NO3f", "long_name": "NO3f"} + ) return d @@ -416,8 +444,8 @@ def add_lazy_noy(d): index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] - d['NOy'] = add_multiple_lazy(d, newkeys) - d['NOy'] = d['NOy'].assign_attrs({'name': 'NOy', 'long_name': 'NOy'}) + d["NOy"] = add_multiple_lazy(d, newkeys) + d["NOy"] = d["NOy"].assign_attrs({"name": "NOy", "long_name": "NOy"}) return d @@ -439,12 +467,12 @@ def add_lazy_rh(d): def add_lazy_nox(d): keys = _get_keys(d) - allvars = Series(['NO', 'NOX']) + allvars = Series(["NO", "NOX"]) index = allvars.isin(keys) if can_do(index): newkeys = allvars.loc[index] - d['NOx'] = add_multiple_lazy(d, newkeys) - d['NOx'] = d['NOx'].assign_attrs({'name': 'NOx', 'long_name': 'NOx'}) + d["NOx"] = add_multiple_lazy(d, newkeys) + d["NOx"] = d["NOx"].assign_attrs({"name": "NOx", "long_name": "NOx"}) return d @@ -475,188 +503,225 @@ def _predefined_mapping_tables(dset): to_improve = {} to_nadp = {} to_aqs = { - 'OZONE': ['O3'], - 'PM2.5': ['PM25'], - 'CO': ['CO'], - 'NOY': ['NOy'], - 'NOX': ['NOx'], - 'SO2': ['SO2'], - 'NOX': ['NOx'], - 'NO': ['NO'], - 'NO2': ['NO2'], - 'SO4f': ['SO4f'], - 'PM10': ['PM10'], - 'NO3f': ['NO3f'], - 'ECf': ['ECf'], - 'OCf': ['OCf'], - 'ETHANE': ['ETHA'], - 'BENZENE': ['BENZENE'], - 'TOLUENE': ['TOL'], - 'ISOPRENE': ['ISOP'], - 'O-XYLENE': ['XYL'], - 'WS': ['WSPD10'], - 'TEMP': ['TEMP2'], - 'WD': ['WDIR10'], - 'NAf': ['NAf'], - 'MGf': ['AMGJ'], - 'TIf': ['ATIJ'], - 'SIf': ['ASIJ'], - 'Kf': ['Kf'], - 'CAf': ['CAf'], - 'NH4f': ['NH4f'], - 'FEf': ['AFEJ'], - 'ALf': ['AALJ'], - 'MNf': ['AMNJ'], + "OZONE": ["O3"], + "PM2.5": ["PM25"], + "CO": ["CO"], + "NOY": ["NOy"], + "NOX": ["NOx"], + "SO2": ["SO2"], + "NO": ["NO"], + "NO2": ["NO2"], + "SO4f": ["SO4f"], + "PM10": ["PM10"], + "NO3f": ["NO3f"], + "ECf": ["ECf"], + "OCf": ["OCf"], + "ETHANE": ["ETHA"], + "BENZENE": ["BENZENE"], + "TOLUENE": ["TOL"], + "ISOPRENE": ["ISOP"], + "O-XYLENE": ["XYL"], + "WS": ["WSPD10"], + "TEMP": ["TEMP2"], + "WD": ["WDIR10"], + "NAf": ["NAf"], + "MGf": ["AMGJ"], + "TIf": ["ATIJ"], + "SIf": ["ASIJ"], + "Kf": ["Kf"], + "CAf": ["CAf"], + "NH4f": ["NH4f"], + "FEf": ["AFEJ"], + "ALf": ["AALJ"], + "MNf": ["AMNJ"], } to_airnow = { - 'OZONE': ['O3'], - 'PM2.5': ['PM25'], - 'CO': ['CO'], - 'NOY': ['NOy'], - 'NOX': ['NOx'], - 'SO2': ['SO2'], - 'NOX': ['NOx'], - 'NO': ['NO'], - 'NO2': ['NO2'], - 'SO4f': ['SO4f'], - 'PM10': ['PM10'], - 'NO3f': ['NO3f'], - 'ECf': ['ECf'], - 'OCf': ['OCf'], - 'ETHANE': ['ETHA'], - 'BENZENE': ['BENZENE'], - 'TOLUENE': ['TOL'], - 'ISOPRENE': ['ISOP'], - 'O-XYLENE': ['XYL'], - 'WS': ['WSPD10'], - 'TEMP': ['TEMP2'], - 'WD': ['WDIR10'], - 'NAf': ['NAf'], - 'MGf': ['AMGJ'], - 'TIf': ['ATIJ'], - 'SIf': ['ASIJ'], - 'Kf': ['Kf'], - 'CAf': ['CAf'], - 'NH4f': ['NH4f'], - 'FEf': ['AFEJ'], - 'ALf': ['AALJ'], - 'MNf': ['AMNJ'], + "OZONE": ["O3"], + "PM2.5": ["PM25"], + "CO": ["CO"], + "NOY": ["NOy"], + "NOX": ["NOx"], + "SO2": ["SO2"], + "NO": ["NO"], + "NO2": ["NO2"], + "SO4f": ["SO4f"], + "PM10": ["PM10"], + "NO3f": ["NO3f"], + "ECf": ["ECf"], + "OCf": ["OCf"], + "ETHANE": ["ETHA"], + "BENZENE": ["BENZENE"], + "TOLUENE": ["TOL"], + "ISOPRENE": ["ISOP"], + "O-XYLENE": ["XYL"], + "WS": ["WSPD10"], + "TEMP": ["TEMP2"], + "WD": ["WDIR10"], + "NAf": ["NAf"], + "MGf": ["AMGJ"], + "TIf": ["ATIJ"], + "SIf": ["ASIJ"], + "Kf": ["Kf"], + "CAf": ["CAf"], + "NH4f": ["NH4f"], + "FEf": ["AFEJ"], + "ALf": ["AALJ"], + "MNf": ["AMNJ"], + } + to_crn = { + "SUR_TEMP": ["TEMPG"], + "T_HR_AVG": ["TEMP2"], + "SOLARAD": ["RGRND"], + "SOIL_MOISTURE_5": ["SOIM1"], + "SOIL_MOISTURE_10": ["SOIM2"], } - to_crn = {'SUR_TEMP': ['TEMPG'], 'T_HR_AVG': ['TEMP2'], 'SOLARAD': ['RGRND'], 'SOIL_MOISTURE_5': ['SOIM1'], 'SOIL_MOISTURE_10': ['SOIM2']} to_aeronet = {} to_cems = {} mapping_tables = { - 'improve': to_improve, - 'aqs': to_aqs, - 'airnow': to_airnow, - 'crn': to_crn, - 'cems': to_cems, - 'nadp': to_nadp, - 'aeronet': to_aeronet, + "improve": to_improve, + "aqs": to_aqs, + "airnow": to_airnow, + "crn": to_crn, + "cems": to_cems, + "nadp": to_nadp, + "aeronet": to_aeronet, } - dset = dset.assign_attrs({'mapping_tables': mapping_tables}) + dset = dset.assign_attrs({"mapping_tables": mapping_tables}) return dset # Arrays for different gasses and pm groupings accumulation = array( [ - 'AALJ', - 'AALK1J', - 'AALK2J', - 'ABNZ1J', - 'ABNZ2J', - 'ABNZ3J', - 'ACAJ', - 'ACLJ', - 'AECJ', - 'AFEJ', - 'AISO1J', - 'AISO2J', - 'AISO3J', - 'AKJ', - 'AMGJ', - 'AMNJ', - 'ANAJ', - 'ANH4J', - 'ANO3J', - 'AOLGAJ', - 'AOLGBJ', - 'AORGCJ', - 'AOTHRJ', - 'APAH1J', - 'APAH2J', - 'APAH3J', - 'APNCOMJ', - 'APOCJ', - 'ASIJ', - 'ASO4J', - 'ASQTJ', - 'ATIJ', - 'ATOL1J', - 'ATOL2J', - 'ATOL3J', - 'ATRP1J', - 'ATRP2J', - 'AXYL1J', - 'AXYL2J', - 'AXYL3J', - 'AORGAJ', - 'AORGPAJ', - 'AORGBJ', + "AALJ", + "AALK1J", + "AALK2J", + "ABNZ1J", + "ABNZ2J", + "ABNZ3J", + "ACAJ", + "ACLJ", + "AECJ", + "AFEJ", + "AISO1J", + "AISO2J", + "AISO3J", + "AKJ", + "AMGJ", + "AMNJ", + "ANAJ", + "ANH4J", + "ANO3J", + "AOLGAJ", + "AOLGBJ", + "AORGCJ", + "AOTHRJ", + "APAH1J", + "APAH2J", + "APAH3J", + "APNCOMJ", + "APOCJ", + "ASIJ", + "ASO4J", + "ASQTJ", + "ATIJ", + "ATOL1J", + "ATOL2J", + "ATOL3J", + "ATRP1J", + "ATRP2J", + "AXYL1J", + "AXYL2J", + "AXYL3J", + "AORGAJ", + "AORGPAJ", + "AORGBJ", + ] +) +aitken = array( + [ + "ACLI", + "AECI", + "ANAI", + "ANH4I", + "ANO3I", + "AOTHRI", + "APNCOMI", + "APOCI", + "ASO4I", + "AORGAI", + "AORGPAI", + "AORGBI", + ] +) +coarse = array(["ACLK", "ACORS", "ANH4K", "ANO3K", "ASEACAT", "ASO4K", "ASOIL"]) +noy_gas = array( + [ + "NO", + "NO2", + "NO3", + "N2O5", + "HONO", + "HNO3", + "PAN", + "PANX", + "PNA", + "NTR", + "CRON", + "CRN2", + "CRNO", + "CRPX", + "OPAN", ] ) -aitken = array(['ACLI', 'AECI', 'ANAI', 'ANH4I', 'ANO3I', 'AOTHRI', 'APNCOMI', 'APOCI', 'ASO4I', 'AORGAI', 'AORGPAI', 'AORGBI']) -coarse = array(['ACLK', 'ACORS', 'ANH4K', 'ANO3K', 'ASEACAT', 'ASO4K', 'ASOIL']) -noy_gas = array(['NO', 'NO2', 'NO3', 'N2O5', 'HONO', 'HNO3', 'PAN', 'PANX', 'PNA', 'NTR', 'CRON', 'CRN2', 'CRNO', 'CRPX', 'OPAN']) -pec = array(['AECI', 'AECJ']) -pso4 = array(['ASO4I', 'ASO4J']) -pno3 = array(['ANO3I', 'ANO3J']) -pnh4 = array(['ANH4I', 'ANH4J']) -pcl = array(['ACLI', 'ACLJ']) +pec = array(["AECI", "AECJ"]) +pso4 = array(["ASO4I", "ASO4J"]) +pno3 = array(["ANO3I", "ANO3J"]) +pnh4 = array(["ANH4I", "ANH4J"]) +pcl = array(["ACLI", "ACLJ"]) poc = array( [ - 'AOTHRI', - 'APNCOMI', - 'APOCI', - 'AORGAI', - 'AORGPAI', - 'AORGBI', - 'ATOL1J', - 'ATOL2J', - 'ATOL3J', - 'ATRP1J', - 'ATRP2J', - 'AXYL1J', - 'AXYL2J', - 'AXYL3J', - 'AORGAJ', - 'AORGPAJ', - 'AORGBJ', - 'AOLGAJ', - 'AOLGBJ', - 'AORGCJ', - 'AOTHRJ', - 'APAH1J', - 'APAH2J', - 'APAH3J', - 'APNCOMJ', - 'APOCJ', - 'ASQTJ', - 'AISO1J', - 'AISO2J', - 'AISO3J', - 'AALK1J', - 'AALK2J', - 'ABNZ1J', - 'ABNZ2J', - 'ABNZ3J', - 'AORGAI', - 'AORGAJ', - 'AORGPAI', - 'AORGPAJ', - 'AORGBI', - 'AORGBJ', + "AOTHRI", + "APNCOMI", + "APOCI", + "AORGAI", + "AORGPAI", + "AORGBI", + "ATOL1J", + "ATOL2J", + "ATOL3J", + "ATRP1J", + "ATRP2J", + "AXYL1J", + "AXYL2J", + "AXYL3J", + "AORGAJ", + "AORGPAJ", + "AORGBJ", + "AOLGAJ", + "AOLGBJ", + "AORGCJ", + "AOTHRJ", + "APAH1J", + "APAH2J", + "APAH3J", + "APNCOMJ", + "APOCJ", + "ASQTJ", + "AISO1J", + "AISO2J", + "AISO3J", + "AALK1J", + "AALK2J", + "ABNZ1J", + "ABNZ2J", + "ABNZ3J", + "AORGAI", + "AORGAJ", + "AORGPAI", + "AORGPAJ", + "AORGBI", + "AORGBJ", ] ) -minerals = array(['AALJ', 'ACAJ', 'AFEJ', 'AKJ', 'AMGJ', 'AMNJ', 'ANAJ', 'ATIJ', 'ASIJ']) +minerals = array(["AALJ", "ACAJ", "AFEJ", "AKJ", "AMGJ", "AMNJ", "ANAJ", "ATIJ", "ASIJ"]) diff --git a/monetio/models/fv3chem.py b/monetio/models/fv3chem.py index 8bbe773f..1082dc5f 100644 --- a/monetio/models/fv3chem.py +++ b/monetio/models/fv3chem.py @@ -29,8 +29,10 @@ def open_dataset(fname): else: raise ValueError except ValueError: - print('''File format not recognized. Note that you must preprocess the - files with nemsio2nc4 or fv3grib2nc4 available on github.''') + print( + """File format not recognized. Note that you must preprocess the + files with nemsio2nc4 or fv3grib2nc4 available on github.""" + ) return f @@ -51,20 +53,21 @@ def open_mfdataset(fname): names, nemsio, grib = _ensure_mfdataset_filenames(fname) try: if nemsio: - f = xr.open_mfdataset(names, concat_dim='time') + f = xr.open_mfdataset(names, concat_dim="time") f = _fix_nemsio(f) f = _fix_time_nemsio(f, names) # f['geoht'] = _calc_nemsio_hgt(f) elif grib: - f = xr.open_mfdataset(names, concat_dim='time') + f = xr.open_mfdataset(names, concat_dim="time") f = _fix_grib2(f) else: raise ValueError except ValueError: - print('''File format not recognized. Note that you must preprocess the + print( + """File format not recognized. Note that you must preprocess the files with nemsio2nc4 or fv3grib2nc4 available on github. Do not - mix and match file types. Ensure all are the same file format.''' - ) + mix and match file types. Ensure all are the same file format.""" + ) return f @@ -83,14 +86,15 @@ def _ensure_mfdataset_filenames(fname): """ from glob import glob + from numpy import sort - import six - if isinstance(fname, six.string_types): + + if isinstance(fname, str): names = sort(glob(fname)) else: names = sort(fname) - nemsios = [True for i in names if 'nemsio' in i] - gribs = [True for i in names if 'grb2' in i or 'grib2' in i or 'grb' in i] + nemsios = [True for i in names if "nemsio" in i] + gribs = [True for i in names if "grb2" in i or "grib2" in i or "grb" in i] grib = False nemsio = False if len(nemsios) >= 1: @@ -117,20 +121,21 @@ def _fix_time_nemsio(f, fname): """ from pandas import Timedelta, to_datetime + time = None print(fname) if len(f.time) > 1: tarray = [] for t, fn in zip(f.time.to_index(), fname): - hour = int([i for i in fn.split('.') if 'atmf' in i][0][-3:]) - tdelta = Timedelta(hour, unit='h') + hour = int([i for i in fn.split(".") if "atmf" in i][0][-3:]) + tdelta = Timedelta(hour, unit="h") tarray.append(t + tdelta) time = to_datetime(tarray) else: - hour = int([i for i in fname.split('.') if 'atmf' in i][0][-3:]) - tdelta = Timedelta(hour, unit='h') + hour = int([i for i in fname.split(".") if "atmf" in i][0][-3:]) + tdelta = Timedelta(hour, unit="h") time = f.time.to_index() + tdelta - f['time'] = time + f["time"] = time return f @@ -148,7 +153,8 @@ def _fix_nemsio(f): Description of returned object. """ - from numpy import meshgrid + # from numpy import meshgrid + # # f = _rename_func(f, rename_dict) # lat = f.lat.values # lon = f.lon.values @@ -159,9 +165,9 @@ def _fix_nemsio(f): # f = f.set_coords(['latitude', 'longitude']) f = _rename_func(f, {}) try: - f['geohgt'] = _calc_nemsio_hgt(f) - except: - print('geoht calculation not completed') + f["geohgt"] = _calc_nemsio_hgt(f) + except Exception: + print("geoht calculation not completed") # try: # from pyresample import utils # f['longitude'] = utils.wrap_longitudes(f.longitude) @@ -190,16 +196,16 @@ def _rename_func(f, rename_dict): """ final_dict = {} for i in f.data_vars.keys(): - if 'midlayer' in i: - rename_dict[i] = i.split('midlayer')[0] + if "midlayer" in i: + rename_dict[i] = i.split("midlayer")[0] for i in rename_dict.keys(): if i in f.data_vars.keys(): final_dict[i] = rename_dict[i] f = f.rename(final_dict) try: - f = f.rename({'pp25': 'pm25', 'pp10': 'pm10'}) + f = f.rename({"pp25": "pm25", "pp10": "pm10"}) except ValueError: - print('PM25 and PM10 are not available') + print("PM25 and PM10 are not available") return f @@ -217,213 +223,117 @@ def _fix_grib2(f): Description of returned object. """ - from numpy import meshgrid + # from numpy import meshgrid + rename_dict = { - 'AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere': - 'pm25aod550', - 'AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'pmaod550', - 'AOTK_aerosol_EQ_Dust_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere': - 'dust25aod550', - 'AOTK_chemical_Dust_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'dust25aod550', - 'AOTK_aerosol_EQ_Sea_Salt_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere': - 'salt25aod550', - 'AOTK_chemical_Sea_Salt_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'salt25aod550', - 'AOTK_aerosol_EQ_Sulphate_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere': - 'sulf25aod550', - 'AOTK_chemical_Sulphate_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'sulf25aod550', - 'AOTK_aerosol_EQ_Particulate_Organic_Matter_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere': - 'oc25aod550', - 'AOTK_chemical_Particulate_Organic_Matter_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'oc25aod550', - 'AOTK_aerosol_EQ_Black_Carbon_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere': - 'bc25aod550', - 'AOTK_chemical_Black_Carbon_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'bc25aod550', - 'COLMD_aerosol_EQ_Total_Aerosol_aerosol_size_LT_1eM05_entireatmosphere': - 'tc_aero10', - 'COLMD_chemical_Total_Aerosol_aerosol_size__1e_05_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_aero10', - 'COLMD_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2D5eM06_entireatmosphere': - 'tc_aero25', - 'COLMD_chemical_Total_Aerosol_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_aero25', - 'COLMD_aerosol_EQ_Dust_Dry_aerosol_size_LT_2D5eM06_entireatmosphere': - 'tc_dust25', - 'COLMD_chemical_Dust_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_dust25', - 'COLMD_aerosol_EQ_Sea_Salt_Dry_aerosol_size_LT_2D5eM06_entireatmosphere': - 'tc_salt25', - 'COLMD_chemical_Sea_Salt_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_salt25', - 'COLMD_aerosol_EQ_Black_Carbon_Dry_aerosol_size_LT_2D36eM08_entireatmosphere': - 'tc_bc236', - 'COLMD_chemical_Black_Carbon_Dry_aerosol_size__2_36e_08_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_bc236', - 'COLMD_aerosol_EQ_Particulate_Organic_Matter_Dry_aerosol_size_LT_4D24eM08_entireatmosphere': - 'tc_oc424', - 'COLMD_chemical_Particulate_Organic_Matter_Dry_aerosol_size__4_24e_08_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_oc424', - 'COLMD_aerosol_EQ_Sulphate_Dry_aerosol_size_LT_2D5eM06_entireatmosphere': - 'tc_sulf25', - 'COLMD_chemical_Sulphate_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_sulf25', - 'PMTF_chemical_Dust_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_surface': - 'sfc_dust25', - 'PMTF_chemical_Sea_Salt_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_surface': - 'sfc_salt25', - 'PMTF_chemical_Total_Aerosol_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_surface': - 'sfc_pm25', - 'PMTF_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2D5eM06_surface': - 'sfc_pm25', - 'PMTC_aerosol_EQ_Total_Aerosol_aerosol_size_LT_1eM05_surface': - 'sfc_pm10', - 'PMTF_aerosol_EQ_Sea_Salt_Dry_aerosol_size_LT_2D5eM06_surface': - 'sfc_salt25', - 'PMTF_aerosol_EQ_Dust_Dry_aerosol_size_LT_2D5eM06_surface': - 'sfc_dust25', - 'PMTF_chemical_Dust_Dry_aerosol_size___2e_07__2e_06_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'dustmr1p1', - 'PMTF_chemical_Dust_Dry_aerosol_size___2e_06__3_6e_06_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'dustmr2p5', - 'PMTC_chemical_Dust_Dry_aerosol_size___3_6e_06__6e_06_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'dustmr4p8', - 'PMTC_chemical_Dust_Dry_aerosol_size___6e_06__1_2e_05_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'dustmr9p0', - 'PMTC_chemical_Dust_Dry_aerosol_size___1_2e_05__2e_05_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'dustmr16p0', - 'PMTF_chemical_Sea_Salt_Dry_aerosol_size___2e_07__1e_06_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'saltmr0p6', - 'PMTC_chemical_Sea_Salt_Dry_aerosol_size___1e_06__3e_06_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'saltmr2p0', - 'PMTC_chemical_Sea_Salt_Dry_aerosol_size___3e_06__1e_05_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'saltmr6p5', - 'PMTC_chemical_Sea_Salt_Dry_aerosol_size___1e_05__2e_05_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'saltmr10p5', - 'PMTF_chemical_Sulphate_Dry_aerosol_size__1_39e_07_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'sulfmr1p36', - 'PMTF_chemical_chemical_62016_aerosol_size__4_24e_08_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'aero1_mr0p0424', - 'PMTF_chemical_chemical_62015_aerosol_size__4_24e_08_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'aero2_mr0p0424', - 'PMTF_chemical_chemical_62014_aerosol_size__2_36e_08_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'aero1_mr0p0236', - 'PMTF_chemical_chemical_62013_aerosol_size__2_36e_08_aerosol_wavelength_____code_table_4_91_255_1hybridlevel': - 'aero2_mr0p0236', - 'level': - 'z', - 'AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_1_1e_05_1_12e_05_entireatmosphere': - 'pm25aod11100', - 'AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_1_628e_06_1_652e_06_entireatmosphere': - 'pm25aod1640', - 'AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_8_41e_07_8_76e_07_entireatmosphere': - 'pm25aod860', - 'AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_6_2e_07_6_7e_07_entireatmosphere': - 'pm25aod640', - 'var0_20_112_chemical_Black_Carbon_Dry_aerosol_size__7e_07_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'bc07aod550', - 'var0_20_112_chemical_Particulate_Organic_Matter_Dry_aerosol_size__7e_07_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'oc07aod550', - 'var0_20_112_chemical_Sulphate_Dry_aerosol_size__7e_07_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'sulf07aod550', - 'var0_20_112_chemical_Sea_Salt_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'salt25aod550', - 'AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_3_38e_07_3_42e_07_entireatmosphere': - 'pm25aod340', - 'ASYSFK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_3_38e_07_3_42e_07_entireatmosphere': - 'AF_pm25aod340', - 'SSALBK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_3_38e_07_3_42e_07_entireatmosphere': - 'ssa_pm25aod340', - 'AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_4_3e_07_4_5e_07_entireatmosphere': - 'pm25aod440', - 'AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'pm25aod550', - 'var0_20_112_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'tc_pm25aod550', - 'AOTK_chemical_Dust_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'dust25aod550', - 'var0_20_112_chemical_Dust_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'tc_dust25aod550', - 'AOTK_chemical_Sea_Salt_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'salt25aod550', - 'var0_20_112_chemical_Sea_Salt_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'tc_salt25aod550', - 'AOTK_chemical_Sulphate_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'sulf25aod550', - 'var0_20_112_chemical_Sulphate_Dry_aerosol_size__7e_07_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'tc_sulf25aod550', - 'AOTK_chemical_Particulate_Organic_Matter_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'oc25aod550', - 'var0_20_112_chemical_Particulate_Organic_Matter_Dry_aerosol_size__7e_07_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'tc_sulfaod550', - 'AOTK_chemical_Black_Carbon_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'bc25aod550', - 'var0_20_112_chemical_Black_Carbon_Dry_aerosol_size__7e_07_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere': - 'tc_ocaod550', - 'AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_6_2e_07_6_7e_07_entireatmosphere': - 'pm25aod640', - 'AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_8_41e_07_8_76e_07_entireatmosphere': - 'pm25aod860', - 'AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_1_628e_06_1_652e_06_entireatmosphere': - 'pm25aod1645', - 'AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_1_1e_05_1_12e_05_entireatmosphere': - 'pm25aod11500', - 'COLMD_chemical_Total_Aerosol_aerosol_size__1e_05_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_pm10', - 'COLMD_chemical_Total_Aerosol_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_pm25', - 'COLMD_chemical_Dust_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_dust25', - 'COLMD_chemical_Sea_Salt_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_salt25', - 'COLMD_chemical_Black_Carbon_Dry_aerosol_size__2_36e_08_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_bc036', - 'COLMD_chemical_Particulate_Organic_Matter_Dry_aerosol_size__4_24e_08_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_oc0428', - 'COLMD_chemical_Sulphate_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere': - 'tc_sulf25', - 'AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_3D38eM07_LE_3D42eM07_entireatmosphere': - 'pm25aod340_eq', - 'ASYSFK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_3D38eM07_LE_3D42eM07_entireatmosphere': - 'AF_pm25aod340', - 'SSALBK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_3D38eM07_LE_3D42eM07_entireatmosphere': - 'SSA_pm25aod340', - 'AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_4D3eM07_LE_4D5eM07_entireatmosphere': - 'pm25aod440', - 'SCTAOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere': - 'SA_pm25aod550', - 'SCTAOTK_aerosol_EQ_Dust_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere': - 'SA_dust25aod550', - 'SCTAOTK_aerosol_EQ_Sea_Salt_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere': - 'SA_salt25aod550', - 'SCTAOTK_aerosol_EQ_Sulphate_Dry_aerosol_size_LT_7eM07_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere': - 'SA_sulf07aod550', - 'SCTAOTK_aerosol_EQ_Particulate_Organic_Matter_Dry_aerosol_size_LT_7eM07_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere': - 'SA_oc07aod550', - 'SCTAOTK_aerosol_EQ_Black_Carbon_Dry_aerosol_size_LT_7eM07_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere': - 'SC_bc07aod550', - 'AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_6D2eM07_LE_6D7eM07_entireatmosphere': - 'pm25aod645', - 'AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_8D41eM07_LE_8D76eM07_entireatmosphere': - 'pm25aod841', - 'AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_1D628eM06_LE_1D652eM06_entireatmosphere': - 'pm25aod1628', - 'AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_1D1eM05_LE_1D12eM05_entireatmosphere': - 'pm25aod11000' + "AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere": "pm25aod550", + # "AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "pmaod550", + "AOTK_aerosol_EQ_Dust_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere": "dust25aod550", + "AOTK_chemical_Dust_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "dust25aod550", + "AOTK_aerosol_EQ_Sea_Salt_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere": "salt25aod550", + "AOTK_chemical_Sea_Salt_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "salt25aod550", + "AOTK_aerosol_EQ_Sulphate_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere": "sulf25aod550", + "AOTK_chemical_Sulphate_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "sulf25aod550", + "AOTK_aerosol_EQ_Particulate_Organic_Matter_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere": "oc25aod550", + "AOTK_chemical_Particulate_Organic_Matter_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "oc25aod550", + "AOTK_aerosol_EQ_Black_Carbon_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere": "bc25aod550", + "AOTK_chemical_Black_Carbon_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "bc25aod550", + "COLMD_aerosol_EQ_Total_Aerosol_aerosol_size_LT_1eM05_entireatmosphere": "tc_aero10", + # "COLMD_chemical_Total_Aerosol_aerosol_size__1e_05_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_aero10", + "COLMD_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2D5eM06_entireatmosphere": "tc_aero25", + # "COLMD_chemical_Total_Aerosol_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_aero25", + "COLMD_aerosol_EQ_Dust_Dry_aerosol_size_LT_2D5eM06_entireatmosphere": "tc_dust25", + "COLMD_chemical_Dust_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_dust25", + "COLMD_aerosol_EQ_Sea_Salt_Dry_aerosol_size_LT_2D5eM06_entireatmosphere": "tc_salt25", + "COLMD_chemical_Sea_Salt_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_salt25", + "COLMD_aerosol_EQ_Black_Carbon_Dry_aerosol_size_LT_2D36eM08_entireatmosphere": "tc_bc236", + # "COLMD_chemical_Black_Carbon_Dry_aerosol_size__2_36e_08_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_bc236", + "COLMD_aerosol_EQ_Particulate_Organic_Matter_Dry_aerosol_size_LT_4D24eM08_entireatmosphere": "tc_oc424", + # "COLMD_chemical_Particulate_Organic_Matter_Dry_aerosol_size__4_24e_08_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_oc424", + "COLMD_aerosol_EQ_Sulphate_Dry_aerosol_size_LT_2D5eM06_entireatmosphere": "tc_sulf25", + "COLMD_chemical_Sulphate_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_sulf25", + "PMTF_chemical_Dust_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_surface": "sfc_dust25", + "PMTF_chemical_Sea_Salt_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_surface": "sfc_salt25", + "PMTF_chemical_Total_Aerosol_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_surface": "sfc_pm25", + "PMTF_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2D5eM06_surface": "sfc_pm25", + "PMTC_aerosol_EQ_Total_Aerosol_aerosol_size_LT_1eM05_surface": "sfc_pm10", + "PMTF_aerosol_EQ_Sea_Salt_Dry_aerosol_size_LT_2D5eM06_surface": "sfc_salt25", + "PMTF_aerosol_EQ_Dust_Dry_aerosol_size_LT_2D5eM06_surface": "sfc_dust25", + "PMTF_chemical_Dust_Dry_aerosol_size___2e_07__2e_06_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "dustmr1p1", + "PMTF_chemical_Dust_Dry_aerosol_size___2e_06__3_6e_06_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "dustmr2p5", + "PMTC_chemical_Dust_Dry_aerosol_size___3_6e_06__6e_06_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "dustmr4p8", + "PMTC_chemical_Dust_Dry_aerosol_size___6e_06__1_2e_05_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "dustmr9p0", + "PMTC_chemical_Dust_Dry_aerosol_size___1_2e_05__2e_05_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "dustmr16p0", + "PMTF_chemical_Sea_Salt_Dry_aerosol_size___2e_07__1e_06_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "saltmr0p6", + "PMTC_chemical_Sea_Salt_Dry_aerosol_size___1e_06__3e_06_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "saltmr2p0", + "PMTC_chemical_Sea_Salt_Dry_aerosol_size___3e_06__1e_05_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "saltmr6p5", + "PMTC_chemical_Sea_Salt_Dry_aerosol_size___1e_05__2e_05_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "saltmr10p5", + "PMTF_chemical_Sulphate_Dry_aerosol_size__1_39e_07_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "sulfmr1p36", + "PMTF_chemical_chemical_62016_aerosol_size__4_24e_08_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "aero1_mr0p0424", + "PMTF_chemical_chemical_62015_aerosol_size__4_24e_08_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "aero2_mr0p0424", + "PMTF_chemical_chemical_62014_aerosol_size__2_36e_08_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "aero1_mr0p0236", + "PMTF_chemical_chemical_62013_aerosol_size__2_36e_08_aerosol_wavelength_____code_table_4_91_255_1hybridlevel": "aero2_mr0p0236", + "level": "z", + # "AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_1_1e_05_1_12e_05_entireatmosphere": "pm25aod11100", + # "AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_1_628e_06_1_652e_06_entireatmosphere": "pm25aod1640", + "AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_8_41e_07_8_76e_07_entireatmosphere": "pm25aod860", + "AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_6_2e_07_6_7e_07_entireatmosphere": "pm25aod640", + # "var0_20_112_chemical_Black_Carbon_Dry_aerosol_size__7e_07_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "bc07aod550", + # "var0_20_112_chemical_Particulate_Organic_Matter_Dry_aerosol_size__7e_07_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "oc07aod550", + # "var0_20_112_chemical_Sulphate_Dry_aerosol_size__7e_07_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "sulf07aod550", + # "var0_20_112_chemical_Sea_Salt_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "salt25aod550", + "AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_3_38e_07_3_42e_07_entireatmosphere": "pm25aod340", + "ASYSFK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_3_38e_07_3_42e_07_entireatmosphere": "AF_pm25aod340", + "SSALBK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_3_38e_07_3_42e_07_entireatmosphere": "ssa_pm25aod340", + "AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_4_3e_07_4_5e_07_entireatmosphere": "pm25aod440", + "AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "pm25aod550", + "var0_20_112_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "tc_pm25aod550", + "AOTK_chemical_Dust_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "dust25aod550", + "var0_20_112_chemical_Dust_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "tc_dust25aod550", + "AOTK_chemical_Sea_Salt_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "salt25aod550", + "var0_20_112_chemical_Sea_Salt_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "tc_salt25aod550", + "AOTK_chemical_Sulphate_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "sulf25aod550", + "var0_20_112_chemical_Sulphate_Dry_aerosol_size__7e_07_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "tc_sulf25aod550", + "AOTK_chemical_Particulate_Organic_Matter_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "oc25aod550", + "var0_20_112_chemical_Particulate_Organic_Matter_Dry_aerosol_size__7e_07_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "tc_sulfaod550", + "AOTK_chemical_Black_Carbon_Dry_aerosol_size__2e_05_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "bc25aod550", + "var0_20_112_chemical_Black_Carbon_Dry_aerosol_size__7e_07_aerosol_wavelength_5_45e_07_5_65e_07_entireatmosphere": "tc_ocaod550", + "AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_6_2e_07_6_7e_07_entireatmosphere": "pm25aod640", + "AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_8_41e_07_8_76e_07_entireatmosphere": "pm25aod860", + "AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_1_628e_06_1_652e_06_entireatmosphere": "pm25aod1645", + "AOTK_chemical_Total_Aerosol_aerosol_size__2e_05_aerosol_wavelength_1_1e_05_1_12e_05_entireatmosphere": "pm25aod11500", + "COLMD_chemical_Total_Aerosol_aerosol_size__1e_05_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_pm10", + "COLMD_chemical_Total_Aerosol_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_pm25", + "COLMD_chemical_Dust_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_dust25", + "COLMD_chemical_Sea_Salt_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_salt25", + "COLMD_chemical_Black_Carbon_Dry_aerosol_size__2_36e_08_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_bc036", + "COLMD_chemical_Particulate_Organic_Matter_Dry_aerosol_size__4_24e_08_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_oc0428", + "COLMD_chemical_Sulphate_Dry_aerosol_size__2_5e_06_aerosol_wavelength_____code_table_4_91_255_entireatmosphere": "tc_sulf25", + "AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_3D38eM07_LE_3D42eM07_entireatmosphere": "pm25aod340_eq", + "ASYSFK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_3D38eM07_LE_3D42eM07_entireatmosphere": "AF_pm25aod340", + "SSALBK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_3D38eM07_LE_3D42eM07_entireatmosphere": "SSA_pm25aod340", + "AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_4D3eM07_LE_4D5eM07_entireatmosphere": "pm25aod440", + "SCTAOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere": "SA_pm25aod550", + "SCTAOTK_aerosol_EQ_Dust_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere": "SA_dust25aod550", + "SCTAOTK_aerosol_EQ_Sea_Salt_Dry_aerosol_size_LT_2eM05_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere": "SA_salt25aod550", + "SCTAOTK_aerosol_EQ_Sulphate_Dry_aerosol_size_LT_7eM07_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere": "SA_sulf07aod550", + "SCTAOTK_aerosol_EQ_Particulate_Organic_Matter_Dry_aerosol_size_LT_7eM07_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere": "SA_oc07aod550", + "SCTAOTK_aerosol_EQ_Black_Carbon_Dry_aerosol_size_LT_7eM07_aerosol_wavelength_GE_5D45eM07_LE_5D65eM07_entireatmosphere": "SC_bc07aod550", + "AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_6D2eM07_LE_6D7eM07_entireatmosphere": "pm25aod645", + "AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_8D41eM07_LE_8D76eM07_entireatmosphere": "pm25aod841", + "AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_1D628eM06_LE_1D652eM06_entireatmosphere": "pm25aod1628", + "AOTK_aerosol_EQ_Total_Aerosol_aerosol_size_LT_2eM05_aerosol_wavelength_GE_1D1eM05_LE_1D12eM05_entireatmosphere": "pm25aod11000", } # latitude = f.latitude.values # longitude = f.longitude.values # f['latitude'] = range(len(f.latitude)) # f['longitude'] = range(len(f.longitude)) f = _rename_func(f, rename_dict) - f = f.rename({'latitude': 'y', 'longitude': 'x'}) + f = f.rename({"latitude": "y", "longitude": "x"}) # lon, lat = meshgrid(longitude, latitude) # f['longitude'] = (('y', 'x'), lon) # f['latitude'] = (('y', 'x'), lat) - f = f.set_coords(['latitude', 'longitude']) + f = f.set_coords(["latitude", "longitude"]) # try: # from pyresample import utils # f['longitude'] = utils.wrap_longitudes(f.longitude) @@ -452,9 +362,9 @@ def _calc_nemsio_hgt(f): dz = f.delz z = dz + sfc z = z.rolling(z=len(f.z), min_periods=1).sum() - z.name = 'geohgt' - z.attrs['long_name'] = 'Geopotential Height' - z.attrs['units'] = 'm' + z.name = "geohgt" + z.attrs["long_name"] = "Geopotential Height" + z.attrs["units"] = "m" return z @@ -477,11 +387,11 @@ def calc_nemsio_pressure(dset): """ # sfc = dset.pressfc.load() / 100. # dpres = dset.dpres.load() / 100. * -1. - sfc = dset.pressfc / 100. - dpres = dset.dpres / 100. * -1. + sfc = dset.pressfc / 100.0 + dpres = dset.dpres / 100.0 * -1.0 dpres[:, 0, :, :] = sfc + dpres[:, 0, :, :] pres = dpres.rolling(z=len(dset.z), min_periods=1).sum() - pres.name = 'press' - pres.attrs['units'] = 'mb' - pres.attrs['long_name'] = 'Mid Layer Pressure' + pres.name = "press" + pres.attrs["units"] = "mb" + pres.attrs["long_name"] = "Mid Layer Pressure" return pres diff --git a/monetio/models/hysplit.py b/monetio/models/hysplit.py index 6a2a1827..a0836b61 100644 --- a/monetio/models/hysplit.py +++ b/monetio/models/hysplit.py @@ -1,11 +1,6 @@ -""" HYPSLIT MODEL READER """ -import sys -import datetime -import pandas as pd -import xarray as xr -import numpy as np - """ +HYPSLIT MODEL READER + This code developed at the NOAA Air Resources Laboratory. Alice Crawford Allison Ring @@ -34,10 +29,16 @@ """ +import datetime +import sys + +import numpy as np +import pandas as pd +import xarray as xr def open_dataset( - fname, drange=None, century=None, verbose=False, sample_time_stamp="start",check_grid=True + fname, drange=None, century=None, verbose=False, sample_time_stamp="start", check_grid=True ): """Short summary. @@ -61,7 +62,7 @@ def open_dataset( else time is start of sampling time period. check_grid : boolean - if True call fix_grid_continuity to check to see that + if True call fix_grid_continuity to check to see that xindx and yindx values are sequential (e.g. not [1,2,3,4,5,7]). If they are not, then add missing values to the xarray.. @@ -84,8 +85,11 @@ def open_dataset( sample_time_stamp=sample_time_stamp, ) dset = binfile.dset - if check_grid: return fix_grid_continuity(dset) - else: return dset + if check_grid: + return fix_grid_continuity(dset) + else: + return dset + def check_drange(drange, pdate1, pdate2): """ @@ -123,10 +127,10 @@ def check_drange(drange, pdate1, pdate2): class ModelBin: """ - represents a binary cdump (concentration) output file from HYSPLIT - methods: - readfile - opens and reads contents of cdump file into an xarray - self.dset + represents a binary cdump (concentration) output file from HYSPLIT + methods: + readfile - opens and reads contents of cdump file into an xarray + self.dset """ def __init__( @@ -146,7 +150,7 @@ def __init__( sample_time_stamp : str if 'end' - time in xarray will indicate end of sampling time. - else - time in xarray will indicate start of sampling time. + else - time in xarray will indicate start of sampling time. century : integer verbose : boolean read @@ -176,16 +180,14 @@ def __init__( if readwrite == "r": if verbose: print("reading " + filename) - self.dataflag = self.readfile( - filename, drange, verbose=verbose, century=century - ) + self.dataflag = self.readfile(filename, drange, verbose=verbose, century=century) @staticmethod def define_struct(): """Each record in the fortran binary begins and ends with 4 bytes which specify the length of the record. These bytes are called pad below. They are not used here, but are thrown out. The following block defines - a numpy dtype object for each record in the binary file. """ + a numpy dtype object for each record in the binary file.""" from numpy import dtype real4 = ">f" @@ -317,10 +319,7 @@ def parse_header(self, hdata1): number of starting locations in file. """ if len(hdata1["start_loc"]) != 1: - print( - "WARNING in ModelBin _readfile - number of starting locations " - "incorrect" - ) + print("WARNING in ModelBin _readfile - number of starting locations " "incorrect") print(hdata1["start_loc"]) # in python 3 np.fromfile reads the record into a list even if it is # just one number. @@ -347,9 +346,7 @@ def parse_hdata2(self, hdata2, nstartloc, century): century = 2000 else: century = 1900 - print( - "WARNING: Guessing Century for HYSPLIT concentration file", century - ) + print("WARNING: Guessing Century for HYSPLIT concentration file", century) # add sourcedate which is datetime.datetime object sourcedate = datetime.datetime( century + hdata2["r_year"][nnn], @@ -389,10 +386,18 @@ def parse_hdata6and7(self, hdata6, hdata7, century): if not hdata6: return False, None, None pdate1 = datetime.datetime( - century + hdata6["oyear"], hdata6["omonth"], hdata6["oday"], hdata6["ohr"],hdata6["omin"] + century + hdata6["oyear"], + hdata6["omonth"], + hdata6["oday"], + hdata6["ohr"], + hdata6["omin"], ) pdate2 = datetime.datetime( - century + hdata7["oyear"], hdata7["omonth"], hdata7["oday"], hdata7["ohr"],hdata7["omin"] + century + hdata7["oyear"], + hdata7["omonth"], + hdata7["oday"], + hdata7["ohr"], + hdata7["omin"], ) dt = pdate2 - pdate1 sample_dt = dt.days * 24 + dt.seconds / 3600.0 @@ -458,18 +463,18 @@ def makegrid(self, xindx, yindx): def readfile(self, filename, drange, verbose, century): """Data from the file is stored in an xarray, self.dset - returns False if all concentrations are zero else returns True. - INPUTS - filename - name of cdump file to open - drange - [date1, date2] - range of dates to load data for. if [] - then loads all data. - date1 and date2 should be datetime ojbects. - verbose - turns on print statements - century - if None will try to guess the century by looking - at the last two digits of the year. - For python 3 the numpy char4 are read in as a numpy.bytes_ - class and need to be converted to a python - string by using decode('UTF-8'). + returns False if all concentrations are zero else returns True. + INPUTS + filename - name of cdump file to open + drange - [date1, date2] - range of dates to load data for. if [] + then loads all data. + date1 and date2 should be datetime ojbects. + verbose - turns on print statements + century - if None will try to guess the century by looking + at the last two digits of the year. + For python 3 the numpy char4 are read in as a numpy.bytes_ + class and need to be converted to a python + string by using decode('UTF-8'). """ # 8/16/2016 moved species=[] to before while loop. Added print @@ -549,9 +554,7 @@ class and need to be converted to a python # ) # if number of elements is nonzero then if hdata8a["ne"] >= 1: - self.atthash["Species ID"].append( - hdata8a["poll"][0].decode("UTF-8") - ) + self.atthash["Species ID"].append(hdata8a["poll"][0].decode("UTF-8")) # get rec8 - indx and jndx hdata8b = np.fromfile(fid, dtype=rec8b, count=hdata8a["ne"][0]) # add sample start time to list of start times with @@ -591,7 +594,7 @@ class and need to be converted to a python # imax iterations. if ii > imax: testf = False - print('greater than imax', testf, ii, imax) + print("greater than imax", testf, ii, imax) if inc_iii: iii += 1 self.atthash["Concentration Grid"] = ahash @@ -599,7 +602,7 @@ class and need to be converted to a python self.atthash["Coordinate time description"] = "Beginning of sampling time" # END OF Loop to go through each sampling time if self.dset is None: - print('DSET is NONE') + print("DSET is NONE") return False if self.dset.variables: self.dset.attrs = self.atthash @@ -612,9 +615,7 @@ class and need to be converted to a python # if verbose: # print(self.dset) if iii == 0 and verbose: - print( - "Warning: ModelBin class _readfile method: no data in the date range found" - ) + print("Warning: ModelBin class _readfile method: no data in the date range found") return False return True @@ -640,7 +641,7 @@ def combine_dataset( century=None, verbose=False, sample_time_stamp="start", - check_grid=True + check_grid=True, ): """ Inputs : @@ -663,9 +664,10 @@ def combine_dataset( added to get concentration from all species. If list of species is provided, only those species will be added. - Files need to have the same concentration grid defined. + Files need to have the same concentration grid defined. """ iii = 0 + mlat_p = mlon_p = None ylist = [] dtlist = [] splist = [] @@ -699,7 +701,7 @@ def combine_dataset( century=century, verbose=verbose, sample_time_stamp=sample_time_stamp, - check_grid=False + check_grid=False, ) else: # use all dates hxr = open_dataset( @@ -707,11 +709,11 @@ def combine_dataset( century=century, verbose=verbose, sample_time_stamp=sample_time_stamp, - check_grid=False + check_grid=False, ) try: mlat, mlon = getlatlon(hxr) - except: + except Exception: print("WARNING Cannot open ") print(fname[0]) print(century) @@ -787,17 +789,22 @@ def combine_dataset( keylist = ["time description"] for key in keylist: newhxr = newhxr.assign_attrs({key: hxr.attrs[key]}) - if check_grid: return fix_grid_continuity(newhxr) - else: return newhxr + if check_grid: + return fix_grid_continuity(newhxr) + else: + return newhxr + def get_even_latlongrid(dset, xlim, ylim): - xindx = np.arange(xlim[0], xlim[1]+1) - yindx = np.arange(ylim[0], ylim[1]+1) - return get_latlongrid(dset, xindx, yindx) + xindx = np.arange(xlim[0], xlim[1] + 1) + yindx = np.arange(ylim[0], ylim[1] + 1) + return get_latlongrid(dset, xindx, yindx) + def fix_grid_continuity(dset): # if grid already continuos don't do anything. - if check_grid_continuity(dset): return dset + if check_grid_continuity(dset): + return dset xv = dset.x.values yv = dset.y.values @@ -805,22 +812,23 @@ def fix_grid_continuity(dset): xlim = [xv[0], xv[-1]] ylim = [yv[0], yv[-1]] - xindx = np.arange(xlim[0], xlim[1]+1) - yindx = np.arange(ylim[0], ylim[1]+1) + xindx = np.arange(xlim[0], xlim[1] + 1) + yindx = np.arange(ylim[0], ylim[1] + 1) - mgrid = get_even_latlongrid(dset,xlim,ylim) + mgrid = get_even_latlongrid(dset, xlim, ylim) conc = np.zeros_like(mgrid[0]) - dummy = xr.DataArray(conc,dims=['y','x']) - dummy = dummy.assign_coords(latitude=(('y','x'),mgrid[1])) - dummy = dummy.assign_coords(longitude=(('y','x'),mgrid[0])) - dummy = dummy.assign_coords(x=(('x'),xindx)) - dummy = dummy.assign_coords(y=(('y'),yindx)) - cdset, dummy2 = xr.align(dset,dummy,join='outer') - cdset = cdset.assign_coords(latitude=(('y','x'),mgrid[1])) - cdset = cdset.assign_coords(longitude=(('y','x'),mgrid[0])) + dummy = xr.DataArray(conc, dims=["y", "x"]) + dummy = dummy.assign_coords(latitude=(("y", "x"), mgrid[1])) + dummy = dummy.assign_coords(longitude=(("y", "x"), mgrid[0])) + dummy = dummy.assign_coords(x=(("x"), xindx)) + dummy = dummy.assign_coords(y=(("y"), yindx)) + cdset, dummy2 = xr.align(dset, dummy, join="outer") + cdset = cdset.assign_coords(latitude=(("y", "x"), mgrid[1])) + cdset = cdset.assign_coords(longitude=(("y", "x"), mgrid[0])) return cdset.fillna(0) + def check_grid_continuity(dset): """ checks to see if x and y coords are skipping over any grid points. @@ -832,12 +840,15 @@ def check_grid_continuity(dset): """ xv = dset.x.values yv = dset.y.values - t1 = np.array([xv[i] - xv[i-1] for i in np.arange(1,len(xv))]) - t2 = np.array([yv[i] - yv[i-1] for i in np.arange(1,len(yv))]) - if np.any(t1!=1): return False - if np.any(t2!=1): return False + t1 = np.array([xv[i] - xv[i - 1] for i in np.arange(1, len(xv))]) + t2 = np.array([yv[i] - yv[i - 1] for i in np.arange(1, len(yv))]) + if np.any(t1 != 1): + return False + if np.any(t2 != 1): + return False return True + def get_latlongrid(dset, xindx, yindx): """ INPUTS @@ -846,7 +857,7 @@ def get_latlongrid(dset, xindx, yindx): yindx : list of integers RETURNS mgrid : output of numpy meshgrid function. - Two 2d arrays of latitude, longitude. + Two 2d arrays of latitude, longitude. The grid points in cdump file represent center of the sampling area. @@ -868,21 +879,23 @@ def get_latlongrid(dset, xindx, yindx): latlist = [lat[x - 1] for x in yindx] mgrid = np.meshgrid(lonlist, latlist) return mgrid - #slat = self.llcrnr_lat - #slon = self.llcrnr_lon - #lat = np.arange(slat, slat + self.nlat * self.dlat, self.dlat) - #lon = np.arange(slon, slon + self.nlon * self.dlon, self.dlon) - #lonlist = [lon[x - 1] for x in xindx] - #latlist = [lat[x - 1] for x in yindx] - #mgrid = np.meshgrid(lonlist, latlist) + # slat = self.llcrnr_lat + # slon = self.llcrnr_lon + # lat = np.arange(slat, slat + self.nlat * self.dlat, self.dlat) + # lon = np.arange(slon, slon + self.nlon * self.dlon, self.dlon) + # lonlist = [lon[x - 1] for x in xindx] + # latlist = [lat[x - 1] for x in yindx] + # mgrid = np.meshgrid(lonlist, latlist) + def get_index_fromgrid(dset, latgrid, longrid): - llcrnr_lat = dset.attrs["Concentration Grid"]["llcrnr latitude"] - llcrnr_lon = dset.attrs["Concentration Grid"]["llcrnr longitude"] - nlat = dset.attrs["Concentration Grid"]["Number Lat Points"] - nlon = dset.attrs["Concentration Grid"]["Number Lon Points"] - dlat = dset.attrs["Concentration Grid"]["Latitude Spacing"] - dlon = dset.attrs["Concentration Grid"]["Longitude Spacing"] + # llcrnr_lat = dset.attrs["Concentration Grid"]["llcrnr latitude"] + # llcrnr_lon = dset.attrs["Concentration Grid"]["llcrnr longitude"] + # nlat = dset.attrs["Concentration Grid"]["Number Lat Points"] + # nlon = dset.attrs["Concentration Grid"]["Number Lon Points"] + # dlat = dset.attrs["Concentration Grid"]["Latitude Spacing"] + # dlon = dset.attrs["Concentration Grid"]["Longitude Spacing"] + return NotImplementedError def getlatlon(dset): @@ -906,14 +919,14 @@ def getlatlon(dset): def hysp_massload(dset, threshold=0, mult=1, zvals=None): - """ Calculate mass loading from HYSPLIT xarray + """Calculate mass loading from HYSPLIT xarray INPUTS dset: xarray dataset output by open_dataset OR xarray data array output by combine_dataset threshold : float mult : float zvals : list of levels to calculate mass loading over. - Outputs: + Outputs: totl_aml : xarray data array total ash mass loading (summed over all layers), ash mass loading Units in (unit mass / m^2) @@ -935,16 +948,14 @@ def hysp_massload(dset, threshold=0, mult=1, zvals=None): return total_aml -def hysp_heights( - dset, threshold, mult=1, height_mult=1 / 1000.0, mass_load=True, species=None -): - """ Calculate top-height from HYSPLIT xarray +def hysp_heights(dset, threshold, mult=1, height_mult=1 / 1000.0, mass_load=True, species=None): + """Calculate top-height from HYSPLIT xarray Input: xarray dataset output by open_dataset OR xarray data array output by combine_dataset threshold : mass loading threshold (threshold = xx) mult : convert from meters to other unit. default is 1/1000.0 to convert to km. - Outputs: ash top heights, altitude levels """ + Outputs: ash top heights, altitude levels""" # either get mass loading of each point if mass_load: @@ -967,14 +978,14 @@ def hysp_heights( def calc_total_mass(dset): - return -1 + return -1 def calc_aml(dset, species=None): - """ Calculates the mass loading at each altitude for the dataset + """Calculates the mass loading at each altitude for the dataset Input: xarray dataset output by open_dataset OR xarray data array output by combine_dataset - Output: total ash mass loading """ + Output: total ash mass loading""" # Totals values for all particles if isinstance(dset, xr.core.dataset.Dataset): total_par = add_species(dset) @@ -987,13 +998,13 @@ def calc_aml(dset, species=None): def hysp_thresh(dset, threshold, mult=1): - """ Calculates a threshold mask array based on the + """Calculates a threshold mask array based on the ash mass loading from HYSPLIT xarray Inputs: xarray, ash mass loading threshold (threshold = xx) - Outputs: ash mass loading threshold mask array + Outputs: ash mass loading threshold mask array Returns 0 where values are below or equal to threshold. Returns 1 where values are greather than threshold - + """ # Calculate ash mass loading for xarray aml_alts = calc_aml(dset) @@ -1012,9 +1023,9 @@ def hysp_thresh(dset, threshold, mult=1): def add_species(dset, species=None): """ - species : list of Species ID's. - if none then all ids in the "species ID" attribute will be used. - Calculate sum of particles. + species : list of Species ID's. + if none then all ids in the "species ID" attribute will be used. + Calculate sum of particles. """ sflist = [] splist = dset.attrs["Species ID"] @@ -1050,7 +1061,7 @@ def _delta_multiply(pars): """ # Calculate the delta altitude for each layer and # multiplies concentration by layer thickness to return mass load. - + # pars: xarray data array concentration with z coordinate. # OUTPUT diff --git a/monetio/models/hytraj.py b/monetio/models/hytraj.py index 75b63d29..785f9b07 100644 --- a/monetio/models/hytraj.py +++ b/monetio/models/hytraj.py @@ -1,12 +1,13 @@ # Reads a tdump file, outputs a Pandas DataFrame -import numpy as np import re + +import numpy as np import pandas as pd def open_dataset(filename): - """ Opens a tdump file, returns trajectory array + """Opens a tdump file, returns trajectory array Parameters ---------------- @@ -63,13 +64,13 @@ def get_metinfo(tdump): # Going back to first line of file tdump.seek(0) # Dimensions of met file array in numpy array - dim1 = tdump.readline().strip().replace(' ', '') + dim1 = tdump.readline().strip().replace(" ", "") dim1 = np.array(list(dim1)) # Read met file info into array metinfo = [] a = 0 while a < int(dim1[0]): - tmp = re.sub(r'\s+', ',', tdump.readline().strip()) + tmp = re.sub(r"\s+", ",", tdump.readline().strip()) metinfo.append(tmp) a += 1 return metinfo @@ -94,30 +95,29 @@ def get_startlocs(tdump): # Going back to first line of file tdump.seek(0) # Gets the metinfo - metinfo = get_metinfo(tdump) + _ = get_metinfo(tdump) # Read next line - get number of starting locations - dim2 = list(tdump.readline().strip().split(' ')) + dim2 = list(tdump.readline().strip().split(" ")) start_locs = [] b = 0 while b < int(dim2[0]): - tmp2 = re.sub(r'\s+', ',', tdump.readline().strip()) - tmp2 = tmp2.split(',') + tmp2 = re.sub(r"\s+", ",", tdump.readline().strip()) + tmp2 = tmp2.split(",") start_locs.append(tmp2) b += 1 # Putting starting locations array into pandas DataFrame - heads = ['year', 'month', 'day', 'hour', 'latitude', 'longitude', 'altitude'] + heads = ["year", "month", "day", "hour", "latitude", "longitude", "altitude"] stlocs = pd.DataFrame(np.array(start_locs), columns=heads) - cols = ['year', 'month', 'day', 'hour'] + cols = ["year", "month", "day", "hour"] # Joins cols into one column called time - stlocs['time'] = stlocs[cols].apply(lambda row: ' '.join(row.values.astype(str)), axis=1) + stlocs["time"] = stlocs[cols].apply(lambda row: " ".join(row.values.astype(str)), axis=1) # Drops cols stlocs = stlocs.drop(cols, axis=1) # Reorders columns - stlocs = stlocs[['time', 'latitude', 'longitude', 'altitude']] + stlocs = stlocs[["time", "latitude", "longitude", "altitude"]] # Puts time into datetime object - stlocs['time'] = stlocs.apply(lambda row: time_str_fixer(row['time']), - axis=1) - stlocs['time'] = pd.to_datetime(stlocs['time'], format='%y %m %d %H') + stlocs["time"] = stlocs.apply(lambda row: time_str_fixer(row["time"]), axis=1) + stlocs["time"] = pd.to_datetime(stlocs["time"], format="%y %m %d %H") return stlocs @@ -125,7 +125,7 @@ def time_str_fixer(timestr): """ timestr : str output - rval : str + rval : str if year is 2006, hysplit trajectory output writes year as single digit 6. This must be turned into 06 to be read properly. @@ -136,11 +136,12 @@ def time_str_fixer(timestr): month = str(int(temp[1])).zfill(2) temp[0] = year temp[1] = month - rval = str.join(' ', temp) + rval = str.join(" ", temp) else: rval = timestr return rval + def get_traj(tdump): """Finds the trajectory information from the tdump file @@ -159,21 +160,28 @@ def get_traj(tdump): # Going back to first line of file tdump.seek(0) # Gets the starting locations - stlocs = get_startlocs(tdump) + _ = get_startlocs(tdump) # Read the number (and names) of additional variables in traj file - varibs = re.sub(r'\s+', ',', tdump.readline().strip()) - varibs = varibs.split(',') + varibs = re.sub(r"\s+", ",", tdump.readline().strip()) + varibs = varibs.split(",") variables = varibs[1:] # Read the traj arrays into pandas dataframe - heads = ['time', 'traj_num', 'met_grid', 'forecast_hour', - 'traj_age', 'latitude', 'longitude', 'altitude'] + variables - traj = pd.read_csv(tdump, header=None, sep='\s+', parse_dates={'time': [2, 3, 4, 5, 6]}) + heads = [ + "time", + "traj_num", + "met_grid", + "forecast_hour", + "traj_age", + "latitude", + "longitude", + "altitude", + ] + variables + traj = pd.read_csv(tdump, header=None, sep=r"\s+", parse_dates={"time": [2, 3, 4, 5, 6]}) # Adds headers to dataframe traj.columns = heads # Makes all headers lowercase traj.columns = map(str.lower, traj.columns) # Puts time datetime object - traj['time'] = traj.apply(lambda row: time_str_fixer(row['time']), - axis=1) - traj['time'] = pd.to_datetime(traj['time'], format='%y %m %d %H %M') + traj["time"] = traj.apply(lambda row: time_str_fixer(row["time"]), axis=1) + traj["time"] = pd.to_datetime(traj["time"], format="%y %m %d %H %M") return traj diff --git a/monetio/models/ncep_grib.py b/monetio/models/ncep_grib.py index dc8a7480..cfa9bd31 100644 --- a/monetio/models/ncep_grib.py +++ b/monetio/models/ncep_grib.py @@ -24,9 +24,10 @@ def open_dataset(fname): else: raise ValueError except ValueError: - print('''File format not recognized. Note that you must preprocess the - files with fv3grib2nc4 available on github. https://github.com/noaa-oar-arl/fv3grib2nc4''' - ) + print( + """File format not recognized. Note that you must preprocess the + files with fv3grib2nc4 available on github. https://github.com/noaa-oar-arl/fv3grib2nc4""" + ) return f @@ -47,15 +48,16 @@ def open_mfdataset(fname): names, grib = _ensure_mfdataset_filenames(fname) try: if grib: - f = xr.open_mfdataset(names, concat_dim='time', engine='pynio') + f = xr.open_mfdataset(names, concat_dim="time", engine="pynio") f = _fix_grib2(f) else: raise ValueError except ValueError: - print('''File format not recognized. Note that you must preprocess the + print( + """File format not recognized. Note that you must preprocess the files with nemsio2nc4 or fv3grib2nc4 available on github. Do not - mix and match file types. Ensure all are the same file format.''' - ) + mix and match file types. Ensure all are the same file format.""" + ) return f @@ -74,13 +76,14 @@ def _ensure_mfdataset_filenames(fname): """ from glob import glob + from numpy import sort - import six - if isinstance(fname, six.string_types): + + if isinstance(fname, str): names = sort(glob(fname)) else: names = sort(fname) - gribs = [True for i in names if 'grb2' in i or 'grib2' in i] + gribs = [True for i in names if "grb2" in i or "grib2" in i] grib = False if len(gribs) >= 1: grib = True @@ -105,8 +108,8 @@ def _rename_func(f, rename_dict): """ final_dict = {} for i in f.data_vars.keys(): - if 'midlayer' in i: - rename_dict[i] = i.split('midlayer')[0] + if "midlayer" in i: + rename_dict[i] = i.split("midlayer")[0] for i in rename_dict.keys(): if i in f.data_vars.keys(): final_dict[i] = rename_dict[i] @@ -128,13 +131,14 @@ def _fix_grib2(f): """ from numpy import meshgrid + latitude = f.lat_0.values longitude = f.lon_0.values - f['latitude'] = range(len(f.latitude)) - f['longitude'] = range(len(f.longitude)) - f = f.rename({'latitude': 'y', 'longitude': 'x'}) + f["latitude"] = range(len(f.latitude)) + f["longitude"] = range(len(f.longitude)) + f = f.rename({"latitude": "y", "longitude": "x"}) lon, lat = meshgrid(longitude, latitude) - f['longitude'] = (('y', 'x'), lon) - f['latitude'] = (('y', 'x'), lat) - f = f.set_coords(['latitude', 'longitude']) + f["longitude"] = (("y", "x"), lon) + f["latitude"] = (("y", "x"), lat) + f = f.set_coords(["latitude", "longitude"]) return f diff --git a/monetio/models/pardump.py b/monetio/models/pardump.py index 6e086055..40ac3804 100644 --- a/monetio/models/pardump.py +++ b/monetio/models/pardump.py @@ -1,9 +1,4 @@ # vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4 -import datetime -import numpy as np -import pandas as pd - - """ PGRMMR: Alice Crawford ORG: NOAA/ARL PYTHON 3 @@ -19,6 +14,10 @@ """ +import datetime + +import numpy as np +import pandas as pd def open_dataset(fname, drange=None, century=2000, verbose=False): @@ -40,9 +39,9 @@ def open_dataset(fname, drange=None, century=2000, verbose=False): class Pardump: """methods for writing and reading a pardump file. - __init__ initializes structure of binary file. - write writes a pardump file. - read reads a pardump file. returns a pandas DataFrame. + __init__ initializes structure of binary file. + write writes a pardump file. + read reads a pardump file. returns a pandas DataFrame. """ def __init__(self, fname="PARINIT"): @@ -51,7 +50,7 @@ def __init__(self, fname="PARINIT"): ##'p' variables coorespond to padding that fortran adds. """ self.fname = fname - #self.dtfmt = "%Y%m%d%H%M" + # self.dtfmt = "%Y%m%d%H%M" tp1 = ">f" # big endian float. tp2 = ">i" # big endian integer. @@ -210,7 +209,7 @@ def read(self, drange=None, verbose=False, century=2000, sorti=None): # Only store data if it is in the daterange specified. if testdate: - #print("Adding data ", hdata, pdate) + # print("Adding data ", hdata, pdate) # otherwise get endian error message when create dataframe. ndata = data.byteswap().newbyteorder() par_frame = pd.DataFrame.from_records(ndata) # create data frame @@ -234,9 +233,7 @@ def read(self, drange=None, verbose=False, century=2000, sorti=None): parframe_all = par_frame.copy() else: parframe_all = pd.concat([parframe_all, par_frame], axis=0) - par_frame = pd.concat( - [par_frame], keys=[self.fname] - ) # add a filename key + par_frame = pd.concat([par_frame], keys=[self.fname]) # add a filename key iii += 1 @@ -251,13 +248,7 @@ def read(self, drange=None, verbose=False, century=2000, sorti=None): # if verbose: # print "Before date. Closing file" if iii > imax: - print( - "Read pardump. Limited to" - + str(imax) - + " iterations. Stopping" - ) + print("Read pardump. Limited to" + str(imax) + " iterations. Stopping") testf = False - parframe_all = pd.concat( - [parframe_all], keys=[self.fname] - ) # add a filename key + parframe_all = pd.concat([parframe_all], keys=[self.fname]) # add a filename key return parframe_all diff --git a/monetio/models/prepchem.py b/monetio/models/prepchem.py index 3c5fa02f..4a7d6dec 100644 --- a/monetio/models/prepchem.py +++ b/monetio/models/prepchem.py @@ -4,12 +4,13 @@ try: import fv3grid as fg + has_fv3grid = True except ImportError: has_fv3grid = False -def open_dataset(fname, dtype='f4', res='C384', tile=1): +def open_dataset(fname, dtype="f4", res="C384", tile=1): """Reads the binary data for FV3-CHEM input generated by prep_chem_sources. Parameters @@ -32,24 +33,22 @@ def open_dataset(fname, dtype='f4', res='C384', tile=1): w = FortranFile(fname) a = w.read_reals(dtype=dtype) r = int(res[1:]) - s = a.reshape((r, r), order='F') + s = a.reshape((r, r), order="F") if has_fv3grid: grid = fg.get_fv3_grid(res=res, tile=tile) # grid = grid.set_coords(['latitude', 'longitude', 'grid_lat', 'grid_lon']) - grid['longitude'] = wrap_longitudes(grid.longitude) + grid["longitude"] = wrap_longitudes(grid.longitude) # grid = grid.rename({'grid_lat': 'lat_b', 'grid_lon': 'lon_b'}) - name = fname.split('.bin')[0] - grid[name] = (('x', 'y'), s) + name = fname.split(".bin")[0] + grid[name] = (("x", "y"), s) return grid[name] else: - print( - 'Please install the fv3grid from https://github.com/bbakernoaa/fv3grid' - ) - print('to gain the full capability of this dataset') - return xr.DataArray(s, dims=('x', 'y')) + print("Please install the fv3grid from https://github.com/bbakernoaa/fv3grid") + print("to gain the full capability of this dataset") + return xr.DataArray(s, dims=("x", "y")) -def to_prepchem_binary(data, fname='output.bin', dtype='f4'): +def to_prepchem_binary(data, fname="output.bin", dtype="f4"): """Writes to binary file for prep_chem_sources. Parameters @@ -67,6 +66,6 @@ def to_prepchem_binary(data, fname='output.bin', dtype='f4'): Description of returned object. """ - f = FortranFile(fname, 'w') + f = FortranFile(fname, "w") f.write_record(data.astype(dtype)) f.close() diff --git a/monetio/obs/__init__.py b/monetio/obs/__init__.py index e4995eee..58ce51f6 100644 --- a/monetio/obs/__init__.py +++ b/monetio/obs/__init__.py @@ -1,8 +1,34 @@ -from . import aeronet, airnow, aqs, cems_mod, crn, epa_util, improve_mod, ish, ish_lite, nadp, openaq, pams +from . import ( + aeronet, + airnow, + aqs, + cems_mod, + crn, + epa_util, + improve_mod, + ish, + ish_lite, + nadp, + openaq, + pams, +) -__all__ = ['aeronet', 'airnow', 'aqs', 'crn', 'epa_util', 'improve_mod', 'ish', 'ish_lite' 'cems_mod', 'nadp', 'openaq', 'pams'] +__all__ = [ + "aeronet", + "airnow", + "aqs", + "crn", + "epa_util", + "improve_mod", + "ish", + "ish_lite", + "cems_mod", + "nadp", + "openaq", + "pams", +] -__name__ = 'obs' +__name__ = "obs" # ish = ish_mod.ISH() # airnow = airnow_mod.AirNow() diff --git a/monetio/obs/aeronet.py b/monetio/obs/aeronet.py index bf318a16..52a2acf7 100644 --- a/monetio/obs/aeronet.py +++ b/monetio/obs/aeronet.py @@ -1,6 +1,5 @@ # this is written to retrive airnow data concatenate and add to pandas array # for usage -from builtins import object, str from datetime import datetime import pandas as pd @@ -10,30 +9,41 @@ from joblib import Parallel, delayed has_joblib = True -except: +except ImportError: has_joblib = False def dateparse(x): - return datetime.strptime(x, '%d:%m:%Y %H:%M:%S') + return datetime.strptime(x, "%d:%m:%Y %H:%M:%S") -def add_local(fname, product='AOD15', dates=None, latlonbox=None, freq=None, interp_to_values=None): +def add_local( + fname, + product="AOD15", + dates=None, + latlonbox=None, + freq=None, + interp_to_values=None, + daily=False, + inv_type=None, + detect_dust=False, +): a = AERONET() a.url = fname # df = a.read_aeronet(fname) - self.prod = product.upper() + a.prod = product.upper() if daily: a.daily = 20 # daily data else: a.daily = 10 # all points if inv_type is not None: - a.inv_type = 'ALM15' + a.inv_type = "ALM15" else: a.inv_type = inv_type - if 'AOD' in self.prod: + if "AOD" in a.prod: if interp_to_values is not None: - if ~isinstance(interp_to_values, ndarray): + # TODO: could probably use np.asanyarray here + if not isinstance(interp_to_values, ndarray): a.new_aod_values = array(interp_to_values) else: a.new_aod_values = interp_to_values @@ -41,10 +51,10 @@ def add_local(fname, product='AOD15', dates=None, latlonbox=None, freq=None, int try: a.url = fname a.read_aeronet() - except: - print('Error reading:' + fname) + except Exception: + print("Error reading:" + fname) if freq is not None: - a.df = a.df.groupby('siteid').resample(freq).mean().reset_index() + a.df = a.df.groupby("siteid").resample(freq).mean().reset_index() if detect_dust: a.dust_detect() if a.new_aod_values is not None: @@ -54,7 +64,7 @@ def add_local(fname, product='AOD15', dates=None, latlonbox=None, freq=None, int def add_data( dates=None, - product='AOD15', + product="AOD15", latlonbox=None, daily=False, interp_to_aod_values=None, @@ -73,8 +83,8 @@ def add_data( min_date = dates.min() max_date = dates.max() # find days from here to there - days = pd.date_range(start=min_date, end=max_date, freq='D') - days1 = pd.date_range(start=min_date, end=max_date, freq='D') + pd.Timedelta(1, unit='D') + days = pd.date_range(start=min_date, end=max_date, freq="D") + days1 = pd.date_range(start=min_date, end=max_date, freq="D") + pd.Timedelta(1, unit="D") vars = dict( product=product, latlonbox=latlonbox, @@ -86,16 +96,19 @@ def add_data( detect_dust=detect_dust, ) dfs = Parallel(n_jobs=n_procs, verbose=verbose)( - delayed(_parallel_aeronet_call)(pd.DatetimeIndex([d1, d2]), **vars) for d1, d2 in zip(days, days1) + delayed(_parallel_aeronet_call)(pd.DatetimeIndex([d1, d2]), **vars) + for d1, d2 in zip(days, days1) ) df = pd.concat(dfs, ignore_index=True).drop_duplicates() if freq is not None: df.index = df.time - df = df.groupby('siteid').resample(freq).mean().reset_index() + df = df.groupby("siteid").resample(freq).mean().reset_index() return df.reset_index(drop=True) else: if ~has_joblib and (n_procs > 1): - print('Please install joblib to use the parallel feature of monetio.aeronet. Proceeding in serial mode...') + print( + "Please install joblib to use the parallel feature of monetio.aeronet. Proceeding in serial mode..." + ) df = a.add_data( dates=dates, product=product, @@ -111,7 +124,15 @@ def add_data( def _parallel_aeronet_call( - dates=None, product='AOD15', latlonbox=None, daily=False, interp_to_aod_values=None, inv_type=None, freq=None, siteid=None, detect_dust=False + dates=None, + product="AOD15", + latlonbox=None, + daily=False, + interp_to_aod_values=None, + inv_type=None, + freq=None, + siteid=None, + detect_dust=False, ): a = AERONET() df = a.add_data( @@ -128,19 +149,22 @@ def _parallel_aeronet_call( return df -class AERONET(object): +class AERONET: def __init__(self): - from numpy import concatenate, arange + from numpy import arange, concatenate - self.baseurl = 'https://aeronet.gsfc.nasa.gov/cgi-bin/print_web_data_v3?' - self.dates = [datetime.strptime('2016-06-06 12:00:00', '%Y-%m-%d %H:%M:%S'), datetime.strptime('2016-06-10 13:00:00', '%Y-%m-%d %H:%M:%S')] + self.baseurl = "https://aeronet.gsfc.nasa.gov/cgi-bin/print_web_data_v3?" + self.dates = [ + datetime.strptime("2016-06-06 12:00:00", "%Y-%m-%d %H:%M:%S"), + datetime.strptime("2016-06-10 13:00:00", "%Y-%m-%d %H:%M:%S"), + ] self.datestr = [] self.df = pd.DataFrame() self.daily = None self.prod = None self.inv_type = None self.siteid = None - self.objtype = 'AERONET' + self.objtype = "AERONET" self.usecols = concatenate((arange(30), arange(65, 83))) # [21.1,-131.6686,53.04,-58.775] #[latmin,lonmin,latmax,lonmax] self.latlonbox = None @@ -148,43 +172,68 @@ def __init__(self): self.new_aod_values = None def build_url(self): - sy = self.dates.min().strftime('%Y') - sm = self.dates.min().strftime('%m').zfill(2) - sd = self.dates.min().strftime('%d').zfill(2) - sh = self.dates.min().strftime('%H').zfill(2) - ey = self.dates.max().strftime('%Y').zfill(2) - em = self.dates.max().strftime('%m').zfill(2) - ed = self.dates.max().strftime('%d').zfill(2) - eh = self.dates.max().strftime('%H').zfill(2) - if self.prod in ['AOD10', 'AOD15', 'AOD20', 'SDA10', 'SDA15', 'SDA20', 'TOT10', 'TOT15', 'TOT20']: - base_url = 'https://aeronet.gsfc.nasa.gov/cgi-bin/print_web_data_v3?' + sy = self.dates.min().strftime("%Y") + sm = self.dates.min().strftime("%m").zfill(2) + sd = self.dates.min().strftime("%d").zfill(2) + sh = self.dates.min().strftime("%H").zfill(2) + ey = self.dates.max().strftime("%Y").zfill(2) + em = self.dates.max().strftime("%m").zfill(2) + ed = self.dates.max().strftime("%d").zfill(2) + eh = self.dates.max().strftime("%H").zfill(2) + if self.prod in [ + "AOD10", + "AOD15", + "AOD20", + "SDA10", + "SDA15", + "SDA20", + "TOT10", + "TOT15", + "TOT20", + ]: + base_url = "https://aeronet.gsfc.nasa.gov/cgi-bin/print_web_data_v3?" inv_type = None else: - base_url = 'https://aeronet.gsfc.nasa.gov/cgi-bin/print_web_data_inv_v3?' - if self.inv_type == 'ALM15': - inv_type = '&ALM15=1' + base_url = "https://aeronet.gsfc.nasa.gov/cgi-bin/print_web_data_inv_v3?" + if self.inv_type == "ALM15": + inv_type = "&ALM15=1" else: - inv_type = '&AML20=1' + inv_type = "&AML20=1" date_portion = ( - 'year=' + sy + '&month=' + sm + '&day=' + sd + '&hour=' + sh + '&year2=' + ey + '&month2=' + em + '&day2=' + ed + '&hour2=' + eh + "year=" + + sy + + "&month=" + + sm + + "&day=" + + sd + + "&hour=" + + sh + + "&year2=" + + ey + + "&month2=" + + em + + "&day2=" + + ed + + "&hour2=" + + eh ) # print(self.prod, inv_type) if self.inv_type is not None: - product = '&product=' + self.prod + product = "&product=" + self.prod else: - product = '&' + self.prod + '=1' - self.inv_type = '' - time = '&AVG=' + str(self.daily) + product = "&" + self.prod + "=1" + self.inv_type = "" + time = "&AVG=" + str(self.daily) if self.siteid is not None: - latlonbox = '&site={}'.format(self.siteid) + latlonbox = f"&site={self.siteid}" elif self.latlonbox is None: - latlonbox = '' + latlonbox = "" else: lat1 = str(float(self.latlonbox[0])) lon1 = str(float(self.latlonbox[1])) lat2 = str(float(self.latlonbox[2])) lon2 = str(float(self.latlonbox[3])) - latlonbox = '&lat1=' + lat1 + '&lat2=' + lat2 + '&lon1=' + lon1 + '&lon2=' + lon2 + latlonbox = "&lat1=" + lat1 + "&lat2=" + lat2 + "&lon1=" + lon1 + "&lon2=" + lon2 # print(base_url) # print(date_portion) # print(product) @@ -192,35 +241,43 @@ def build_url(self): # print(time) # print(latlonbox) if inv_type is None: - inv_type = '' - self.url = base_url + date_portion + product + inv_type + time + latlonbox + '&if_no_html=1' + inv_type = "" + self.url = base_url + date_portion + product + inv_type + time + latlonbox + "&if_no_html=1" def read_aeronet(self): - print('Reading Aeronet Data...') + print("Reading Aeronet Data...") # header = self.get_columns() - df = pd.read_csv(self.url, engine='python', header=None, skiprows=6, parse_dates={'time': [1, 2]}, date_parser=dateparse, na_values=-999) + df = pd.read_csv( + self.url, + engine="python", + header=None, + skiprows=6, + parse_dates={"time": [1, 2]}, + date_parser=dateparse, + na_values=-999, + ) # df.rename(columns={'date_time': 'time'}, inplace=True) columns = self.get_columns() df.columns = columns # self.get_columns() df.index = df.time df.rename( columns={ - 'site_latitude(degrees)': 'latitude', - 'site_longitude(degrees)': 'longitude', - 'site_elevation(m)': 'elevation', - 'aeronet_site': 'siteid', + "site_latitude(degrees)": "latitude", + "site_longitude(degrees)": "longitude", + "site_elevation(m)": "elevation", + "aeronet_site": "siteid", }, inplace=True, ) - df.dropna(subset=['latitude', 'longitude'], inplace=True) - df.dropna(axis=1, how='all', inplace=True) + df.dropna(subset=["latitude", "longitude"], inplace=True) + df.dropna(axis=1, how="all", inplace=True) self.df = df def get_columns(self): header = pd.read_csv(self.url, skiprows=5, header=None, nrows=1).values.flatten() - final = ['time'] + final = ["time"] for i in header: - if "Date(" in i or 'Time(' in i: + if "Date(" in i or "Time(" in i: pass else: final.append(i.lower()) @@ -229,7 +286,7 @@ def get_columns(self): def add_data( self, dates=None, - product='AOD15', + product="AOD15", latlonbox=None, daily=False, interp_to_aod_values=None, @@ -241,7 +298,9 @@ def add_data( self.latlonbox = latlonbox self.siteid = siteid if dates is None: # get the current day - self.dates = pd.date_range(start=pd.to_datetime('today'), end=pd.to_datetime('now'), freq='H') + self.dates = pd.date_range( + start=pd.to_datetime("today"), end=pd.to_datetime("now"), freq="H" + ) else: self.dates = dates self.prod = product.upper() @@ -250,18 +309,18 @@ def add_data( else: self.daily = 10 # all points if inv_type is not None: - self.inv_type = 'ALM15' + self.inv_type = "ALM15" else: self.inv_type = inv_type - if 'AOD' in self.prod: + if "AOD" in self.prod: self.new_aod_values = interp_to_aod_values self.build_url() try: self.read_aeronet() - except: + except Exception: print(self.url) if freq is not None: - self.df = self.df.groupby('siteid').resample(freq).mean().reset_index() + self.df = self.df.groupby("siteid").resample(freq).mean().reset_index() if detect_dust: self.dust_detect() if self.new_aod_values is not None: @@ -274,66 +333,83 @@ def calc_550nm(self): aod550 = aod500 * (550/500) ^ -alpha """ - self.df['aod_550nm'] = self.df.aod_500nm * (550.0 / 500.0) ** (-self.df['440-870_angstrom_exponent']) + self.df["aod_550nm"] = self.df.aod_500nm * (550.0 / 500.0) ** ( + -self.df["440-870_angstrom_exponent"] + ) def calc_new_aod_values(self): def _tspack_aod_interp(row, new_wv=[440.0, 470.0, 550.0, 670.0, 870.0, 1020.0, 1240.0]): + import numpy as np + try: import pytspack except ImportError: - print('You must install pytspack before using this function') + print("You must install pytspack before using this function") + + new_wv = np.asarray(new_wv) + # df_aod_nu = self._aeronet_aod_and_nu(row) - aod_columns = [aod_column for aod_column in row.index if 'aod_' in aod_column] + aod_columns = [aod_column for aod_column in row.index if "aod_" in aod_column] aods = row[aod_columns] - wv = [float(aod_column.replace('aod_', '').replace('nm', '')) for aod_column in aod_columns] - a = pd.DataFrame({'aod': aods}).reset_index() - a['wv'] = wv + wv = [ + float(aod_column.replace("aod_", "").replace("nm", "")) + for aod_column in aod_columns + ] + a = pd.DataFrame({"aod": aods}).reset_index() + a["wv"] = wv df_aod_nu = a.dropna() - df_aod_nu_sorted = df_aod_nu.sort_values(by='wv').dropna() + df_aod_nu_sorted = df_aod_nu.sort_values(by="wv").dropna() if len(df_aod_nu_sorted) < 2: return new_wv * NaN else: - x, y, yp, sigma = pytspack.tspsi(df_aod_nu_sorted.wv.values, df_aod_nu_sorted.aod.values) + x, y, yp, sigma = pytspack.tspsi( + df_aod_nu_sorted.wv.values, df_aod_nu_sorted.aod.values + ) yi = pytspack.hval(self.new_aod_values, x, y, yp, sigma) return yi - out = self.df.apply(_tspack_aod_interp, axis=1, result_type='expand', new_wv=self.new_aod_values) - names = 'aod_' + pd.Series(self.new_aod_values.astype(int).astype(str)) + 'nm' + out = self.df.apply( + _tspack_aod_interp, axis=1, result_type="expand", new_wv=self.new_aod_values + ) + names = "aod_" + pd.Series(self.new_aod_values.astype(int).astype(str)) + "nm" out.columns = names.values self.df = pd.concat([self.df, out], axis=1) - @staticmethod - def _tspack_aod_interp(row, new_wv=[440.0, 470.0, 550.0, 670.0, 870.0, 1020.0, 1240.0]): - try: - import pytspack - except ImportError: - print('You must install pytspack before using this function') - # df_aod_nu = self._aeronet_aod_and_nu(row) - aod_columns = [aod_column for aod_column in row.index if 'aod_' in aod_column] - aods = row[aod_columns] - wv = [float(aod_column.replace('aod_', '').replace('nm', '')) for aod_column in aod_columns] - a = pd.DataFrame({'aod': aods}).reset_index() - a['wv'] = wv - df_aod_nu = a.dropna() - df_aod_nu_sorted = df_aod_nu.sort_values(by='wv').dropna() - if len(df_aod_nu_sorted) < 2: - return xi * NaN - else: - x, y, yp, sigma = pytspack.tspsi(df_aod_nu_sorted.wv.values, df_aod_nu_sorted.aod.values) - yi = pytspack.hval(self.new_aod_values, x, y, yp, sigma) - return yi + # @staticmethod + # def _tspack_aod_interp(row, new_wv=[440.0, 470.0, 550.0, 670.0, 870.0, 1020.0, 1240.0]): + # try: + # import pytspack + # except ImportError: + # print("You must install pytspack before using this function") + + # # df_aod_nu = self._aeronet_aod_and_nu(row) + # aod_columns = [aod_column for aod_column in row.index if "aod_" in aod_column] + # aods = row[aod_columns] + # wv = [float(aod_column.replace("aod_", "").replace("nm", "")) for aod_column in aod_columns] + # a = pd.DataFrame({"aod": aods}).reset_index() + # a["wv"] = wv + # df_aod_nu = a.dropna() + # df_aod_nu_sorted = df_aod_nu.sort_values(by="wv").dropna() + # if len(df_aod_nu_sorted) < 2: + # return xi * NaN + # else: + # x, y, yp, sigma = pytspack.tspsi( + # df_aod_nu_sorted.wv.values, df_aod_nu_sorted.aod.values + # ) + # yi = pytspack.hval(self.new_aod_values, x, y, yp, sigma) + # return yi @staticmethod def _aeronet_aod_and_nu(row): import pandas as pd # print(row) - aod_columns = [aod_column for aod_column in row.index if 'aod_' in aod_column] - wv = [float(aod_column.replace('aod_', '').replace('nm', '')) for aod_column in aod_columns] + aod_columns = [aod_column for aod_column in row.index if "aod_" in aod_column] + wv = [float(aod_column.replace("aod_", "").replace("nm", "")) for aod_column in aod_columns] aods = row[aod_columns] - a = pd.DataFrame({'aod': aods}).reset_index() + a = pd.DataFrame({"aod": aods}).reset_index() # print(a.index,wv) - a['wv'] = wv + a["wv"] = wv return a.dropna() def dust_detect(self): @@ -347,8 +423,10 @@ def dust_detect(self): Description of returned object. """ - self.df['dust'] = (self.df['aod_1020nm'] > 0.3) & (self.df['440-870_angstrom_exponent'] < 0.6) + self.df["dust"] = (self.df["aod_1020nm"] > 0.3) & ( + self.df["440-870_angstrom_exponent"] < 0.6 + ) - def set_daterange(self, begin='', end=''): - dates = pd.date_range(start=begin, end=end, freq='H').values.astype('M8[s]').astype('O') + def set_daterange(self, begin="", end=""): + dates = pd.date_range(start=begin, end=end, freq="H").values.astype("M8[s]").astype("O") self.dates = dates diff --git a/monetio/obs/airnow.py b/monetio/obs/airnow.py index 75c2374f..0edee9ba 100644 --- a/monetio/obs/airnow.py +++ b/monetio/obs/airnow.py @@ -1,63 +1,40 @@ -"""Short summary. +"""AirNow""" - Attributes - ---------- - url : type - Description of attribute `url`. - dates : type - Description of attribute `dates`. - df : type - Description of attribute `df`. - daily : type - Description of attribute `daily`. - objtype : type - Description of attribute `objtype`. - filelist : type - Description of attribute `filelist`. - monitor_file : type - Description of attribute `monitor_file`. - __class__ : type - Description of attribute `__class__`. - monitor_df : type - Description of attribute `monitor_df`. - savecols : type - Description of attribute `savecols`. - """ - -import inspect import os # this is written to retrive airnow data concatenate and add to pandas array # for usage -from builtins import object from datetime import datetime import pandas as pd -datadir = '.' +datadir = "." cwd = os.getcwd() url = None -dates = [datetime.strptime('2016-06-06 12:00:00', '%Y-%m-%d %H:%M:%S'), datetime.strptime('2016-06-06 13:00:00', '%Y-%m-%d %H:%M:%S')] +dates = [ + datetime.strptime("2016-06-06 12:00:00", "%Y-%m-%d %H:%M:%S"), + datetime.strptime("2016-06-06 13:00:00", "%Y-%m-%d %H:%M:%S"), +] daily = False -objtype = 'AirNow' +objtype = "AirNow" filelist = None monitor_df = None savecols = [ - 'time', - 'siteid', - 'site', - 'utcoffset', - 'variable', - 'units', - 'obs', - 'time_local', - 'latitude', - 'longitude', - 'cmsa_name', - 'msa_code', - 'msa_name', - 'state_name', - 'epa_region', + "time", + "siteid", + "site", + "utcoffset", + "variable", + "units", + "obs", + "time_local", + "latitude", + "longitude", + "cmsa_name", + "msa_code", + "msa_name", + "state_name", + "epa_region", ] @@ -72,12 +49,12 @@ def build_urls(dates): furls = [] fnames = [] - print('Building AIRNOW URLs...') + print("Building AIRNOW URLs...") # 2017/20170131/HourlyData_2017012408.dat - url = 'https://s3-us-west-1.amazonaws.com//files.airnowtech.org/airnow/' + url = "https://s3-us-west-1.amazonaws.com//files.airnowtech.org/airnow/" for i in dates: - f = url + i.strftime('%Y/%Y%m%d/HourlyData_%Y%m%d%H.dat') - fname = i.strftime('HourlyData_%Y%m%d%H.dat') + f = url + i.strftime("%Y/%Y%m%d/HourlyData_%Y%m%d%H.dat") + fname = i.strftime("HourlyData_%Y%m%d%H.dat") furls.append(f) fnames.append(fname) # https://s3-us-west-1.amazonaws.com//files.airnowtech.org/airnow/2017/20170108/HourlyData_2016121506.dat @@ -103,15 +80,17 @@ def read_csv(fn): """ try: - dft = pd.read_csv(fn, delimiter='|', header=None, error_bad_lines=False, encoding='ISO-8859-1') - cols = ['date', 'time', 'siteid', 'site', 'utcoffset', 'variable', 'units', 'obs', 'source'] + dft = pd.read_csv( + fn, delimiter="|", header=None, error_bad_lines=False, encoding="ISO-8859-1" + ) + cols = ["date", "time", "siteid", "site", "utcoffset", "variable", "units", "obs", "source"] dft.columns = cols except Exception: - cols = ['date', 'time', 'siteid', 'site', 'utcoffset', 'variable', 'units', 'obs', 'source'] + cols = ["date", "time", "siteid", "site", "utcoffset", "variable", "units", "obs", "source"] dft = pd.DataFrame(columns=cols) - dft['obs'] = dft.obs.astype(float) - dft['siteid'] = dft.siteid.str.zfill(9) - dft['utcoffset'] = dft.utcoffset.astype(int) + dft["obs"] = dft.obs.astype(float) + dft["siteid"] = dft.siteid.str.zfill(9) + dft["utcoffset"] = dft.utcoffset.astype(int) return dft @@ -133,13 +112,13 @@ def retrieve(url, fname): import requests if not os.path.isfile(fname): - print('\n Retrieving: ' + fname) + print("\n Retrieving: " + fname) print(url) - print('\n') + print("\n") r = requests.get(url) - open(fname, 'wb').write(r.content) + open(fname, "wb").write(r.content) else: - print('\n File Exists: ' + fname) + print("\n File Exists: " + fname) def aggregate_files(dates=dates, download=False, n_procs=1): @@ -159,7 +138,7 @@ def aggregate_files(dates=dates, download=False, n_procs=1): import dask import dask.dataframe as dd - print('Aggregating AIRNOW files...') + print("Aggregating AIRNOW files...") urls, fnames = build_urls(dates) if download: for url, fname in zip(urls, fnames): @@ -169,10 +148,10 @@ def aggregate_files(dates=dates, download=False, n_procs=1): dfs = [dask.delayed(read_csv)(f) for f in urls] dff = dd.from_delayed(dfs) df = dff.compute(num_workers=n_procs) - df['time'] = pd.to_datetime(df.date + ' ' + df.time, format='%m/%d/%y %H:%M', exact=True) - df.drop(['date'], axis=1, inplace=True) - df['time_local'] = df.time + pd.to_timedelta(df.utcoffset, unit='H') - print(' Adding in Meta-data') + df["time"] = pd.to_datetime(df.date + " " + df.time, format="%m/%d/%y %H:%M", exact=True) + df.drop(["date"], axis=1, inplace=True) + df["time_local"] = df.time + pd.to_timedelta(df.utcoffset, unit="H") + print(" Adding in Meta-data") df = get_station_locations(df) df = df[savecols] df.drop_duplicates(inplace=True) @@ -201,7 +180,7 @@ def add_data(dates, download=False, wide_fmt=True, n_procs=1): df = aggregate_files(dates=dates, download=download, n_procs=n_procs) if wide_fmt: df = long_to_wide(df) - return df.drop_duplicates(subset=['time', 'latitude', 'longitude', 'siteid']) + return df.drop_duplicates(subset=["time", "latitude", "longitude", "siteid"]) else: return df return df @@ -218,7 +197,7 @@ def filter_bad_values(df): """ from numpy import NaN - df.loc[(df.obs > 3000) | (df.obs < 0), 'obs'] = NaN + df.loc[(df.obs > 3000) | (df.obs < 0), "obs"] = NaN return df @@ -253,7 +232,7 @@ def get_station_locations(df): from .epa_util import read_monitor_file monitor_df = read_monitor_file(airnow=True) - df = pd.merge(df, monitor_df, on='siteid') # , how='left') + df = pd.merge(df, monitor_df, on="siteid") # , how='left') return df @@ -271,6 +250,6 @@ def get_station_locations_remerge(df): Description of returned object. """ - df = pd.merge(df, monitor_df.drop(['Latitude', 'Longitude'], axis=1), on='siteid') # , + df = pd.merge(df, monitor_df.drop(["Latitude", "Longitude"], axis=1), on="siteid") # , # how='left') return df diff --git a/monetio/obs/aqs.py b/monetio/obs/aqs.py index 4859cc08..86bc7fd9 100644 --- a/monetio/obs/aqs.py +++ b/monetio/obs/aqs.py @@ -1,23 +1,42 @@ import inspect import os -# this is a class to deal with aqs data -from builtins import object, zip - import pandas as pd from dask.diagnostics import ProgressBar from .epa_util import read_monitor_file +# this is a class to deal with aqs data + + pbar = ProgressBar() pbar.register() -def add_data(dates, param=None, daily=False, network=None, download=False, local=False, wide_fmt=True, n_procs=1, meta=False): +def add_data( + dates, + param=None, + daily=False, + network=None, + download=False, + local=False, + wide_fmt=True, + n_procs=1, + meta=False, +): from ..util import long_to_wide a = AQS() - df = a.add_data(dates, param=param, daily=daily, network=network, download=download, local=local, n_procs=n_procs, meta=meta) + df = a.add_data( + dates, + param=param, + daily=daily, + network=network, + download=download, + local=local, + n_procs=n_procs, + meta=meta, + ) if wide_fmt: return long_to_wide(df) @@ -25,7 +44,7 @@ def add_data(dates, param=None, daily=False, network=None, download=False, local return df -class AQS(object): +class AQS: """Short summary. Attributes @@ -61,8 +80,8 @@ class AQS(object): def __init__(self): # self.baseurl = 'https://aqs.epa.gov/aqsweb/airdata/' - self.objtype = 'AQS' - self.baseurl = 'https://aqs.epa.gov/aqsweb/airdata/' + self.objtype = "AQS" + self.baseurl = "https://aqs.epa.gov/aqsweb/airdata/" # self.renamedhcols = [ # 'time', 'time_local', 'state_code', 'county_code', 'site_num', # 'parameter_code', 'poc', 'latitude', 'longitude', 'datum', @@ -71,39 +90,50 @@ def __init__(self): # 'state_name', 'county_name', 'date_of_last_change' # ] self.renameddcols = [ - 'time_local', - 'state_code', - 'county_code', - 'site_num', - 'parameter_code', - 'poc', - 'latitude', - 'longitude', - 'datum', - 'parameter_name', - 'sample_duration', - 'pollutant_standard', - 'units', - 'event_type', - 'observation_Count', - 'observation_Percent', - 'obs', - '1st_max_Value', - '1st_max_hour', - 'aqi', - 'method_code', - 'method_name', - 'local_site_name', - 'address', - 'state_name', - 'county_name', - 'city_name', - 'msa_name', - 'date_of_last_change', + "time_local", + "state_code", + "county_code", + "site_num", + "parameter_code", + "poc", + "latitude", + "longitude", + "datum", + "parameter_name", + "sample_duration", + "pollutant_standard", + "units", + "event_type", + "observation_Count", + "observation_Percent", + "obs", + "1st_max_Value", + "1st_max_hour", + "aqi", + "method_code", + "method_name", + "local_site_name", + "address", + "state_name", + "county_name", + "city_name", + "msa_name", + "date_of_last_change", + ] + self.savecols = [ + "time_local", + "time", + "siteid", + "latitude", + "longitude", + "obs", + "units", + "variable", ] - self.savecols = ['time_local', 'time', 'siteid', 'latitude', 'longitude', 'obs', 'units', 'variable'] self.df = pd.DataFrame() # hourly dataframe - self.monitor_file = inspect.getfile(self.__class__)[:-10] + 'data/monitoring_site_locations.dat' + self.monitor_file = ( + inspect.getfile(self.__class__)[:-10] + "data/monitoring_site_locations.dat" + ) self.monitor_df = None self.daily = False self.d_df = None # daily dataframe @@ -124,15 +154,15 @@ def columns_rename(self, columns, verbose=False): """ rcolumn = [] for ccc in columns: - if ccc.strip() == 'Sample Measurement': - newc = 'obs' - elif ccc.strip() == 'Units of Measure': - newc = 'units' + if ccc.strip() == "Sample Measurement": + newc = "obs" + elif ccc.strip() == "Units of Measure": + newc = "units" else: newc = ccc.strip().lower() - newc = newc.replace(' ', '_') + newc = newc.replace(" ", "_") if verbose: - print(ccc + ' renamed ' + newc) + print(ccc + " renamed " + newc) rcolumn.append(newc) return rcolumn @@ -152,37 +182,50 @@ def load_aqs_file(self, url, network): Description of returned object. """ - if 'daily' in url: + if "daily" in url: def dateparse(x): - return pd.datetime.strptime(x, '%Y-%m-%d') + return pd.datetime.strptime(x, "%Y-%m-%d") df = pd.read_csv( - url, parse_dates={'time_local': ["Date Local"]}, date_parser=dateparse, dtype={0: str, 1: str, 2: str}, encoding='ISO-8859-1' + url, + parse_dates={"time_local": ["Date Local"]}, + date_parser=dateparse, + dtype={0: str, 1: str, 2: str}, + encoding="ISO-8859-1", ) df.columns = self.renameddcols - df['pollutant_standard'] = df.pollutant_standard.astype(str) + df["pollutant_standard"] = df.pollutant_standard.astype(str) self.daily = True # df.rename(columns={'parameter_name':'variable'}) else: df = pd.read_csv( - url, parse_dates={'time': ['Date GMT', 'Time GMT'], 'time_local': ["Date Local", "Time Local"]}, infer_datetime_format=True + url, + parse_dates={ + "time": ["Date GMT", "Time GMT"], + "time_local": ["Date Local", "Time Local"], + }, + infer_datetime_format=True, ) # print(df.columns.values) df.columns = self.columns_rename(df.columns.values) - df['siteid'] = df.state_code.astype(str).str.zfill(2) + df.county_code.astype(str).str.zfill(3) + df.site_num.astype(str).str.zfill(4) + df["siteid"] = ( + df.state_code.astype(str).str.zfill(2) + + df.county_code.astype(str).str.zfill(3) + + df.site_num.astype(str).str.zfill(4) + ) # df['siteid'] = df.state_code + df.county_code + df.site_num - df.drop(['state_name', 'county_name'], axis=1, inplace=True) + df.drop(["state_name", "county_name"], axis=1, inplace=True) df.columns = [i.lower() for i in df.columns] - if 'daily' not in url: - df.drop(['datum', 'qualifier'], axis=1, inplace=True) - if 'VOC' in url: + if "daily" not in url: + df.drop(["datum", "qualifier"], axis=1, inplace=True) + if "VOC" in url: voc = True else: voc = False df = self.get_species(df, voc=voc) - return df.drop('date_of_last_change', axis=1) + return df.drop("date_of_last_change", axis=1) def build_url(self, param, year, daily=False, download=False): """Short summary. @@ -204,48 +247,45 @@ def build_url(self, param, year, daily=False, download=False): Description of returned object. """ - import requests - from numpy import NaN - if daily: - beginning = self.baseurl + 'daily_' - fname = 'daily_' + beginning = self.baseurl + "daily_" + fname = "daily_" else: - beginning = self.baseurl + 'hourly_' - fname = 'hourly_' - if (param.upper() == 'OZONE') | (param.upper() == 'O3'): - code = '44201_' - elif param.upper() == 'PM2.5': - code = '88101_' - elif param.upper() == 'PM2.5_FRM': - code = '88502_' - elif param.upper() == 'PM10': - code = '81102_' - elif param.upper() == 'SO2': - code = '42401_' - elif param.upper() == 'NO2': - code = '42602_' - elif param.upper() == 'CO': - code = '42101_' - elif param.upper() == 'NONOxNOy'.upper(): - code = 'NONOxNOy_' - elif param.upper() == 'VOC': + beginning = self.baseurl + "hourly_" + fname = "hourly_" + if (param.upper() == "OZONE") | (param.upper() == "O3"): + code = "44201_" + elif param.upper() == "PM2.5": + code = "88101_" + elif param.upper() == "PM2.5_FRM": + code = "88502_" + elif param.upper() == "PM10": + code = "81102_" + elif param.upper() == "SO2": + code = "42401_" + elif param.upper() == "NO2": + code = "42602_" + elif param.upper() == "CO": + code = "42101_" + elif param.upper() == "NONOxNOy".upper(): + code = "NONOxNOy_" + elif param.upper() == "VOC": # https://aqs.epa.gov/aqsweb/airdata/daily_VOCS_2017.zip - code = 'VOCS_' - elif param.upper() == 'SPEC': - code = 'SPEC_' - elif param.upper() == 'PM10SPEC': - code = 'PM10SPEC_' - elif param.upper() == 'WIND': - code = 'WIND_' - elif param.upper() == 'TEMP': - code = 'TEMP_' - elif param.upper() == 'RHDP': - code = 'RH_DP_' - elif (param.upper() == 'WIND') | (param.upper() == 'WS') | (param.upper() == 'WDIR'): - code = 'WIND_' - url = beginning + code + year + '.zip' - fname = fname + code + year + '.zip' + code = "VOCS_" + elif param.upper() == "SPEC": + code = "SPEC_" + elif param.upper() == "PM10SPEC": + code = "PM10SPEC_" + elif param.upper() == "WIND": + code = "WIND_" + elif param.upper() == "TEMP": + code = "TEMP_" + elif param.upper() == "RHDP": + code = "RH_DP_" + elif (param.upper() == "WIND") | (param.upper() == "WS") | (param.upper() == "WDIR"): + code = "WIND_" + url = beginning + code + year + ".zip" + fname = fname + code + year + ".zip" return url, fname @@ -275,8 +315,8 @@ def build_urls(self, params, dates, daily=False): for i in params: for y in years: url, fname = self.build_url(i, y, daily=daily) - if int(requests.get(url, stream=True).headers['Content-Length']) < 500: - print('File is Empty. Not Processing', url) + if int(requests.get(url, stream=True).headers["Content-Length"]) < 500: + print("File is Empty. Not Processing", url) else: urls.append(url) fnames.append(fname) @@ -302,15 +342,25 @@ def retrieve(self, url, fname): import requests if not os.path.isfile(fname): - print('\n Retrieving: ' + fname) + print("\n Retrieving: " + fname) print(url) - print('\n') + print("\n") r = requests.get(url) - open(fname, 'wb').write(r.content) + open(fname, "wb").write(r.content) else: - print('\n File Exists: ' + fname) - - def add_data(self, dates, param=None, daily=False, network=None, download=False, local=False, n_procs=1, meta=False): + print("\n File Exists: " + fname) + + def add_data( + self, + dates, + param=None, + daily=False, + network=None, + download=False, + local=False, + n_procs=1, + meta=False, + ): """Short summary. Parameters @@ -336,7 +386,20 @@ def add_data(self, dates, param=None, daily=False, network=None, download=False, import dask.dataframe as dd if param is None: - params = ['SPEC', 'PM10', 'PM2.5', 'PM2.5_FRM', 'CO', 'OZONE', 'SO2', 'VOC', 'NONOXNOY', 'WIND', 'TEMP', 'RHDP'] + params = [ + "SPEC", + "PM10", + "PM2.5", + "PM2.5_FRM", + "CO", + "OZONE", + "SO2", + "VOC", + "NONOXNOY", + "WIND", + "TEMP", + "RHDP", + ] else: params = param urls, fnames = self.build_urls(params, dates, daily=daily) @@ -375,23 +438,25 @@ def add_data2(self, df, daily=False, network=None): drop_monitor_cols = False if daily: if drop_monitor_cols: - monitor_drop = ['msa_name', 'city_name', u'local_site_name', u'address', u'datum'] + monitor_drop = ["msa_name", "city_name", "local_site_name", "address", "datum"] self.monitor_df.drop(monitor_drop, axis=1, inplace=True) # else: # monitor_drop = [u'datum'] # self.monitor_df.drop(monitor_drop, axis=1, inplace=True) if network is not None: - monitors = self.monitor_df.loc[self.monitor_df.isin([network])].drop_duplicates(subset=['siteid']) + monitors = self.monitor_df.loc[self.monitor_df.isin([network])].drop_duplicates( + subset=["siteid"] + ) else: - monitors = self.monitor_df.drop_duplicates(subset=['siteid']) + monitors = self.monitor_df.drop_duplicates(subset=["siteid"]) # AMC - merging only on siteid was causing latitude_x latitude_y to be # created. - mlist = ['siteid'] - self.df = pd.merge(self.df, monitors, on=mlist, how='left') + mlist = ["siteid"] + self.df = pd.merge(self.df, monitors, on=mlist, how="left") if daily: - self.df['time'] = self.df.time_local - pd.to_timedelta(self.df.gmt_offset, unit='H') - if pd.Series(self.df.columns).isin(['parameter_name']).max(): - self.df.drop('parameter_name', axis=1, inplace=True) + self.df["time"] = self.df.time_local - pd.to_timedelta(self.df.gmt_offset, unit="H") + if pd.Series(self.df.columns).isin(["parameter_name"]).max(): + self.df.drop("parameter_name", axis=1, inplace=True) return self.df # .copy() def get_species(self, df, voc=False): @@ -411,178 +476,178 @@ def get_species(self, df, voc=False): """ pc = df.parameter_code.unique() - df['variable'] = '' + df["variable"] = "" if voc: - df['variable'] = df.parameter_name.str.upper() + df["variable"] = df.parameter_name.str.upper() return df for i in pc: con = df.parameter_code == i if (i == 88101) | (i == 88502): - df.loc[con, 'variable'] = 'PM2.5' + df.loc[con, "variable"] = "PM2.5" if i == 44201: - df.loc[con, 'variable'] = 'OZONE' + df.loc[con, "variable"] = "OZONE" if i == 81102: - df.loc[con, 'variable'] = 'PM10' + df.loc[con, "variable"] = "PM10" if i == 42401: - df.loc[con, 'variable'] = 'SO2' + df.loc[con, "variable"] = "SO2" if i == 42602: - df.loc[con, 'variable'] = 'NO2' + df.loc[con, "variable"] = "NO2" if i == 42101: - df.loc[con, 'variable'] = 'CO' + df.loc[con, "variable"] = "CO" if i == 62101: - df.loc[con, 'variable'] = 'TEMP' + df.loc[con, "variable"] = "TEMP" if i == 88305: - df.loc[con, 'variable'] = 'OC' + df.loc[con, "variable"] = "OC" if i == 88306: - df.loc[con, 'variable'] = 'NO3f' + df.loc[con, "variable"] = "NO3f" if i == 88307: - df.loc[con, 'variable'] = 'ECf' + df.loc[con, "variable"] = "ECf" if i == 88316: - df.loc[con, 'variable'] = 'ECf_optical' + df.loc[con, "variable"] = "ECf_optical" if i == 88403: - df.loc[con, 'variable'] = 'SO4f' + df.loc[con, "variable"] = "SO4f" if i == 88312: - df.loc[con, 'variable'] = 'TCf' + df.loc[con, "variable"] = "TCf" if i == 88104: - df.loc[con, 'variable'] = 'Alf' + df.loc[con, "variable"] = "Alf" if i == 88107: - df.loc[con, 'variable'] = 'Baf' + df.loc[con, "variable"] = "Baf" if i == 88313: - df.loc[con, 'variable'] = 'BCf' + df.loc[con, "variable"] = "BCf" if i == 88109: - df.loc[con, 'variable'] = 'Brf' + df.loc[con, "variable"] = "Brf" if i == 88110: - df.loc[con, 'variable'] = 'Cdf' + df.loc[con, "variable"] = "Cdf" if i == 88111: - df.loc[con, 'variable'] = 'Caf' + df.loc[con, "variable"] = "Caf" if i == 88117: - df.loc[con, 'variable'] = 'Cef' + df.loc[con, "variable"] = "Cef" if i == 88118: - df.loc[con, 'variable'] = 'Csf' + df.loc[con, "variable"] = "Csf" if i == 88203: - df.loc[con, 'variable'] = 'Cl-f' + df.loc[con, "variable"] = "Cl-f" if i == 88115: - df.loc[con, 'variable'] = 'Clf' + df.loc[con, "variable"] = "Clf" if i == 88112: - df.loc[con, 'variable'] = 'Crf' + df.loc[con, "variable"] = "Crf" if i == 88113: - df.loc[con, 'variable'] = 'Cof' + df.loc[con, "variable"] = "Cof" if i == 88114: - df.loc[con, 'variable'] = 'Cuf' + df.loc[con, "variable"] = "Cuf" if i == 88121: - df.loc[con, 'variable'] = 'Euf' + df.loc[con, "variable"] = "Euf" if i == 88143: - df.loc[con, 'variable'] = 'Auf' + df.loc[con, "variable"] = "Auf" if i == 88127: - df.loc[con, 'variable'] = 'Hff' + df.loc[con, "variable"] = "Hff" if i == 88131: - df.loc[con, 'variable'] = 'Inf' + df.loc[con, "variable"] = "Inf" if i == 88126: - df.loc[con, 'variable'] = 'Fef' + df.loc[con, "variable"] = "Fef" if i == 88146: - df.loc[con, 'variable'] = 'Laf' + df.loc[con, "variable"] = "Laf" if i == 88128: - df.loc[con, 'variable'] = 'Pbf' + df.loc[con, "variable"] = "Pbf" if i == 88140: - df.loc[con, 'variable'] = 'Mgf' + df.loc[con, "variable"] = "Mgf" if i == 88132: - df.loc[con, 'variable'] = 'Mnf' + df.loc[con, "variable"] = "Mnf" if i == 88142: - df.loc[con, 'variable'] = 'Hgf' + df.loc[con, "variable"] = "Hgf" if i == 88134: - df.loc[con, 'variable'] = 'Mof' + df.loc[con, "variable"] = "Mof" if i == 88136: - df.loc[con, 'variable'] = 'Nif' + df.loc[con, "variable"] = "Nif" if i == 88147: - df.loc[con, 'variable'] = 'Nbf' + df.loc[con, "variable"] = "Nbf" if i == 88310: - df.loc[con, 'variable'] = 'NO3f' + df.loc[con, "variable"] = "NO3f" if i == 88152: - df.loc[con, 'variable'] = 'Pf' + df.loc[con, "variable"] = "Pf" if i == 88303: - df.loc[con, 'variable'] = 'K+f' + df.loc[con, "variable"] = "K+f" if i == 88176: - df.loc[con, 'variable'] = 'Rbf' + df.loc[con, "variable"] = "Rbf" if i == 88162: - df.loc[con, 'variable'] = 'Smf' + df.loc[con, "variable"] = "Smf" if i == 88163: - df.loc[con, 'variable'] = 'Scf' + df.loc[con, "variable"] = "Scf" if i == 88154: - df.loc[con, 'variable'] = 'Sef' + df.loc[con, "variable"] = "Sef" if i == 88165: - df.loc[con, 'variable'] = 'Sif' + df.loc[con, "variable"] = "Sif" if i == 88166: - df.loc[con, 'variable'] = 'Agf' + df.loc[con, "variable"] = "Agf" if i == 88302: - df.loc[con, 'variable'] = 'Na+f' + df.loc[con, "variable"] = "Na+f" if i == 88184: - df.loc[con, 'variable'] = 'Naf' + df.loc[con, "variable"] = "Naf" if i == 88168: - df.loc[con, 'variable'] = 'Srf' + df.loc[con, "variable"] = "Srf" if i == 88403: - df.loc[con, 'variable'] = 'SO4f' + df.loc[con, "variable"] = "SO4f" if i == 88169: - df.loc[con, 'variable'] = 'Sf' + df.loc[con, "variable"] = "Sf" if i == 88170: - df.loc[con, 'variable'] = 'Taf' + df.loc[con, "variable"] = "Taf" if i == 88172: - df.loc[con, 'variable'] = 'Tbf' + df.loc[con, "variable"] = "Tbf" if i == 88160: - df.loc[con, 'variable'] = 'Snf' + df.loc[con, "variable"] = "Snf" if i == 88161: - df.loc[con, 'variable'] = 'Tif' + df.loc[con, "variable"] = "Tif" if i == 88312: - df.loc[con, 'variable'] = 'TOT_Cf' + df.loc[con, "variable"] = "TOT_Cf" if i == 88310: - df.loc[con, 'variable'] = 'NON-VOLITILE_NO3f' + df.loc[con, "variable"] = "NON-VOLITILE_NO3f" if i == 88309: - df.loc[con, 'variable'] = 'VOLITILE_NO3f' + df.loc[con, "variable"] = "VOLITILE_NO3f" if i == 88186: - df.loc[con, 'variable'] = 'Wf' + df.loc[con, "variable"] = "Wf" if i == 88314: - df.loc[con, 'variable'] = 'C_370nmf' + df.loc[con, "variable"] = "C_370nmf" if i == 88179: - df.loc[con, 'variable'] = 'Uf' + df.loc[con, "variable"] = "Uf" if i == 88164: - df.loc[con, 'variable'] = 'Vf' + df.loc[con, "variable"] = "Vf" if i == 88183: - df.loc[con, 'variable'] = 'Yf' + df.loc[con, "variable"] = "Yf" if i == 88167: - df.loc[con, 'variable'] = 'Znf' + df.loc[con, "variable"] = "Znf" if i == 88185: - df.loc[con, 'variable'] = 'Zrf' + df.loc[con, "variable"] = "Zrf" if i == 88102: - df.loc[con, 'variable'] = 'Sbf' + df.loc[con, "variable"] = "Sbf" if i == 88103: - df.loc[con, 'variable'] = 'Asf' + df.loc[con, "variable"] = "Asf" if i == 88105: - df.loc[con, 'variable'] = 'Bef' + df.loc[con, "variable"] = "Bef" if i == 88124: - df.loc[con, 'variable'] = 'Gaf' + df.loc[con, "variable"] = "Gaf" if i == 88185: - df.loc[con, 'variable'] = 'Irf' + df.loc[con, "variable"] = "Irf" if i == 88180: - df.loc[con, 'variable'] = 'Kf' + df.loc[con, "variable"] = "Kf" if i == 88301: - df.loc[con, 'variable'] = 'NH4+f' + df.loc[con, "variable"] = "NH4+f" if (i == 88320) | (i == 88355): - df.loc[con, 'variable'] = 'OCf' + df.loc[con, "variable"] = "OCf" if (i == 88357) | (i == 88321): - df.loc[con, 'variable'] = 'ECf' + df.loc[con, "variable"] = "ECf" if i == 42600: - df.loc[con, 'variable'] = 'NOY' + df.loc[con, "variable"] = "NOY" if i == 42601: - df.loc[con, 'variable'] = 'NO' + df.loc[con, "variable"] = "NO" if i == 42603: - df.loc[con, 'variable'] = 'NOX' + df.loc[con, "variable"] = "NOX" if (i == 61103) | (i == 61101): - df.loc[con, 'variable'] = 'WS' + df.loc[con, "variable"] = "WS" if (i == 61104) | (i == 61102): - df.loc[con, 'variable'] = 'WD' + df.loc[con, "variable"] = "WD" if i == 62201: - df.loc[con, 'variable'] = 'RH' + df.loc[con, "variable"] = "RH" if i == 62103: - df.loc[con, 'variable'] = 'DP' + df.loc[con, "variable"] = "DP" return df @staticmethod @@ -603,24 +668,24 @@ def change_units(df): units = df.units.unique() for i in units: con = df.units == i - if i.upper() == 'Parts per billion Carbon'.upper(): - df.loc[con, 'units'] = 'ppbC' - if i == 'Parts per billion': - df.loc[con, 'units'] = 'ppb' - if i == 'Parts per million': - df.loc[con, 'units'] = 'ppm' - if i == 'Micrograms/cubic meter (25 C)': - df.loc[con, 'units'] = 'UG/M3'.lower() - if i == 'Degrees Centigrade': - df.loc[con, 'units'] = 'C' - if i == 'Micrograms/cubic meter (LC)': - df.loc[con, 'units'] = 'UG/M3'.lower() - if i == 'Knots': - df.loc[con, 'obs'] *= 0.51444 - df.loc[con, 'units'] = 'M/S'.lower() - if i == 'Degrees Fahrenheit': - df.loc[con, 'obs'] = (df.loc[con, 'obs'] + 459.67) * 5.0 / 9.0 - df.loc[con, 'units'] = 'K' - if i == 'Percent relative humidity': - df.loc[con, 'units'] = '%' + if i.upper() == "Parts per billion Carbon".upper(): + df.loc[con, "units"] = "ppbC" + if i == "Parts per billion": + df.loc[con, "units"] = "ppb" + if i == "Parts per million": + df.loc[con, "units"] = "ppm" + if i == "Micrograms/cubic meter (25 C)": + df.loc[con, "units"] = "UG/M3".lower() + if i == "Degrees Centigrade": + df.loc[con, "units"] = "C" + if i == "Micrograms/cubic meter (LC)": + df.loc[con, "units"] = "UG/M3".lower() + if i == "Knots": + df.loc[con, "obs"] *= 0.51444 + df.loc[con, "units"] = "M/S".lower() + if i == "Degrees Fahrenheit": + df.loc[con, "obs"] = (df.loc[con, "obs"] + 459.67) * 5.0 / 9.0 + df.loc[con, "units"] = "K" + if i == "Percent relative humidity": + df.loc[con, "units"] = "%" return df diff --git a/monetio/obs/cems_api.py b/monetio/obs/cems_api.py index f87d4ce4..8f0552f0 100644 --- a/monetio/obs/cems_api.py +++ b/monetio/obs/cems_api.py @@ -1,17 +1,3 @@ -from __future__ import print_function -import os -import datetime -import sys -import pandas as pd -import numpy as np -import requests -import copy - -# import pytz -import seaborn as sns -from urllib.parse import quote -import monetio.obs.obs_util as obs_util - """ NAME: cems_api.py PGRMMER: Alice Crawford ORG: ARL @@ -49,6 +35,21 @@ """ +import copy +import datetime +import os +import sys +from urllib.parse import quote + +import numpy as np +import pandas as pd +import requests + +# import pytz +import seaborn as sns + +import monetio.obs.obs_util as obs_util + def test_end(endtime, current): # if endtime None return True @@ -436,11 +437,11 @@ def get(self): data = self.get_raw_data() try: self.status_code = data.status_code - except: + except AttributeError: self.status_code = "None" try: jobject = data.json() - except BaseException: + except Exception: return data df = self.unpack(jobject) return df @@ -457,18 +458,20 @@ class that represents data returned by one emissions/hourlydata call to the rest Attributes """ - def __init__(self, oris, mid, year, quarter, fname=None, calltype='CEM', save=True, prompt=False): + def __init__( + self, oris, mid, year, quarter, fname=None, calltype="CEM", save=True, prompt=False + ): self.oris = oris # oris code of facility self.mid = mid # monitoring location id. self.year = str(year) self.quarter = str(quarter) calltype = calltype.upper().strip() - if calltype == 'F23': - calltype = 'AD' + if calltype == "F23": + calltype = "AD" if not fname: fname = "Emissions." + self.year + ".q" + self.quarter - if calltype == 'AD': - fname += '.AD' + if calltype == "AD": + fname += ".AD" fname += "." + str(self.mid) + "." + str(oris) + ".csv" self.dfall = pd.DataFrame() @@ -512,9 +515,9 @@ def load(self): if not df.empty: self.status_code = 200 print("SO2 DATA EXISTS") - temp = df[df['so2_lbs'] > 0] + temp = df[df["so2_lbs"] > 0] if temp.empty: - print('SO2 lbs all zero') + print("SO2 lbs all zero") # check for two date formats. # ----------------------------------------- @@ -529,12 +532,12 @@ def newdate(x): fail = 0 try: rval = datetime.datetime.strptime(rval, datefmt) - except: + except ValueError: fail = 1 if fail == 1: try: rval = datetime.datetime.strptime(rval, datefmt2) - except: + except ValueError: fail = 2 print(self.fname) print("WARNING: Could not parse date " + rval) @@ -557,7 +560,7 @@ def get(self): data = self.get_raw_data() try: self.status_code = data.status_code - except: + except AttributeError: self.status_code = None if data: df = self.unpack(data) @@ -570,14 +573,14 @@ def unpack(self, data): iii = 0 cols = [] tra = [] - print('----UNPACK-----------------') + print("----UNPACK-----------------") for line in data.iter_lines(decode_unicode=True): # if iii < 5: # print('LINE') # print(line) # 1. Process First line - temp = line.split(',') - if temp[-1] and self.calltype == 'LME': + temp = line.split(",") + if temp[-1] and self.calltype == "LME": print(line) if iii == 0: @@ -636,16 +639,16 @@ def unpack(self, data): df = pd.DataFrame(tra, columns=cols) df.apply(pd.to_numeric, errors="ignore") df = self.manage_date(df) - if self.calltype == 'AD': - df['SO2MODC'] = -8 - if self.calltype == 'LME': - df['SO2MODC'] = -9 + if self.calltype == "AD": + df["SO2MODC"] = -8 + if self.calltype == "LME": + df["SO2MODC"] = -9 df = self.convert_cols(df) df = self.manage_so2modc(df) df = get_so2(df) # the LME data sometimes has duplicate rows. # causing emissions to be over-estimated. - if self.calltype == 'LME': + if self.calltype == "LME": df = df.drop_duplicates() return df @@ -685,12 +688,15 @@ def checkmodc(formula, so2modc, so2_lbs): if so2modc != 0 or not formula: return so2modc else: - if 'F-23' in str(formula): + if "F-23" in str(formula): return -7 else: return -10 - df["SO2MODC"] = df.apply(lambda row: checkmodc(row["SO2CEMSO2FormulaCode"], row['SO2MODC'], row['so2_lbs']), axis=1) + df["SO2MODC"] = df.apply( + lambda row: checkmodc(row["SO2CEMSO2FormulaCode"], row["SO2MODC"], row["so2_lbs"]), + axis=1, + ) return df def convert_cols(self, df): @@ -756,7 +762,7 @@ def lme_getmass(cname): # if operating time is zero then map to 0 (it is '' in file) optime = "OperatingTime" cname = self.so2name - if self.calltype == 'LME': + if self.calltype == "LME": df["so2_lbs"] = df.apply(lambda row: lme_getmass(row[cname]), axis=1) else: df["so2_lbs"] = df.apply(lambda row: getmass(row[optime], row[cname]), axis=1) @@ -984,7 +990,7 @@ def to_dict(self, unit=None): try: mhash = df.reset_index().to_dict("records") - except: + except Exception: mhash = None return mhash @@ -992,7 +998,7 @@ def get_stackht(self, unit): # print(self.df) df = self.df[self.df["name"] == unit] # print(df) - stackhts = df['stackht'].unique() + stackhts = df["stackht"].unique() # print('get stackht', stackhts) return stackhts @@ -1007,8 +1013,8 @@ def get_method(self, unit, daterange): return None temp["testdate"] = temp.apply(lambda row: test_end(row["endDateHour"], edate), axis=1) - temp = temp[temp["testdate"] == True] - method = temp['methodCode'].unique() + temp = temp[temp["testdate"]] + method = temp["methodCode"].unique() return method @@ -1024,17 +1030,21 @@ def load(self): def parsedate(x, sfmt): if not x: return pd.NaT - elif x == 'None': + elif x == "None": return pd.NaT else: try: return pd.to_datetime(x, format=sfmt) - except: - print('time value', x) + except Exception: + print("time value", x) return pd.NaT df = pd.read_csv( - self.fname, index_col=[0], converters=chash, parse_dates=['beginDateHour', 'endDateHour'], date_parser=lambda x: parsedate(x, self.dfmt) + self.fname, + index_col=[0], + converters=chash, + parse_dates=["beginDateHour", "endDateHour"], + date_parser=lambda x: parsedate(x, self.dfmt), ) self.dfall = df.copy() @@ -1084,7 +1094,7 @@ def unpack(self, data): # The stackname may contain multiple 'units' stackname = ihash["unitStackName"] - stackhash = {} + # stackhash = {} shash = {} # first go through the unitStackConfigurations @@ -1123,7 +1133,7 @@ def unpack(self, data): dhash = {} name = unithash["name"] - print('NAME ', name) + print("NAME ", name) dhash["name"] = name if name in shash.keys(): dhash["stackunit"] = shash[name] @@ -1139,7 +1149,7 @@ def unpack(self, data): try: dhash["stackht"] = float(att["stackHeight"]) * ft2m - except: + except ValueError: dhash["stackht"] = np.NaN else: dhash["stackht"] = np.NaN @@ -1154,32 +1164,34 @@ def unpack(self, data): iii = 0 for method in unithash["monitoringMethods"]: # print('METHOD LIST', method) - if 'SO2' in method["parameterCode"]: - print('SO2 data') + if "SO2" in method["parameterCode"]: + print("SO2 data") dhash["parameterCode"] = method["parameterCode"] dhash["methodCode"] = method["methodCode"] - dhash["beginDateHour"] = pd.to_datetime(method["beginDateHour"], format=self.dfmt) + dhash["beginDateHour"] = pd.to_datetime( + method["beginDateHour"], format=self.dfmt + ) dhash["endDateHour"] = pd.to_datetime(method["endDateHour"], format=self.dfmt) dhash["oris"] = self.oris dhash["mid"] = self.mid dhash["request_date"] = self.date - print('Monitoring Location ------------------') + print("Monitoring Location ------------------") print(dhash) - print('------------------') + print("------------------") dlist.append(copy.deepcopy(dhash)) iii += 1 # if there is no monitoring method for SO2 if iii == 0: - dhash["parameterCode"] = 'None' - dhash["methodCode"] = 'None' + dhash["parameterCode"] = "None" + dhash["methodCode"] = "None" dhash["beginDateHour"] = pd.NaT dhash["endDateHour"] = pd.NaT dhash["oris"] = self.oris dhash["mid"] = self.mid dhash["request_date"] = self.date - print('Monitoring Location ------------------') + print("Monitoring Location ------------------") print(dhash) - print('------------------') + print("------------------") dlist.append(copy.deepcopy(dhash)) # print(dlist) @@ -1200,15 +1212,17 @@ def find_stackht(name, stackht, shash, nhash): # this handles case when height is specified for the unitId # and not the stackId else: - ahash = dict((y, x) for x, y in shash.items()) + ahash = {y: x for x, y in shash.items()} if name in ahash.keys(): sid = ahash[name] stackht = nhash[sid] return stackht - df["stackht"] = df.apply(lambda row: find_stackht(row["name"], row["stackht"], shash, nhash), axis=1) + df["stackht"] = df.apply( + lambda row: find_stackht(row["name"], row["stackht"], shash, nhash), axis=1 + ) df["stackht_unit"] = "m" - print('DF2 ------------------') + print("DF2 ------------------") print(df) return df @@ -1315,7 +1329,7 @@ def process_unit_time(instr): # there are many None in end time field. try: year = int(instr[0:4]) - except: + except ValueError: return None quarter = int(instr[4]) if quarter == 1: @@ -1360,7 +1374,7 @@ def state_from_oris(self, orislist): Returns list of state abbreviations """ - statelist = [] + # statelist = [] temp = self.df[self.df["oris"].isin(orislist)] return temp["state"].unique() @@ -1397,7 +1411,7 @@ def get_unit_start(self, oris, unit): temp = temp[temp["unit"] == unit] start = temp["begin time"].unique() - end = temp["end time"].unique() + # end = temp["end time"].unique() sdate = [] for sss in start: sdate.append(self.process_unit_time(sss)) @@ -1417,7 +1431,7 @@ def get_unit_request(self, oris, unit, sdate): klist = ["testdate", "begin time", "end time", "unit", "oris", "request_string"] print(temp[klist]) print("--------------------------------------------") - temp = temp[temp["testdate"] == True] + temp = temp[temp["testdate"]] rstr = temp["request_string"].unique() return rstr @@ -1582,12 +1596,11 @@ def get_monitoring_plan(oris, mid, mrequest, date1, dflist): # adds to list of oris, mid, stackht which will later be turned into # a dataframe with that information. status_code = 204 - iii = 0 - mhash = None + # mhash = None for mr in mrequest: print("Get Monitoring Plan " + mr) plan = MonitoringPlan(str(oris), mr, date1) - status_code = plan.status_code + status_code = plan.status_code # noqa: F841 stackht = plan.get_stackht(mid) if len(stackht) == 1: print(len(stackht)) @@ -1608,7 +1621,7 @@ def get_monitoring_plan(oris, mid, mrequest, date1, dflist): # mhash = mhash[0] # stackht = float(mhash["stackht"]) else: - print('Stack height not determined ', stackht) + print("Stack height not determined ", stackht) stackht = None istr = "\n" + "Could not retrieve stack height from monitoring plan \n" istr += "Please enter stack height (in meters) \n" @@ -1616,15 +1629,15 @@ def get_monitoring_plan(oris, mid, mrequest, date1, dflist): test = input(istr) try: stackht = float(test) - except: + except ValueError: stackht = None method = plan.get_method(mid, [date1, date1]) - print('METHODS returned', method, mid, str(oris)) + print("METHODS returned", method, mid, str(oris)) # catchall so do not double count. # currently CEM and CEMF23 result in same EmissionCall request string. if method: - if 'CEM' in method and 'CEMF23' in method: - method = 'CEM' + if "CEM" in method and "CEMF23" in method: + method = "CEM" dflist.append((str(oris), mid, stackht)) return dflist, method @@ -1762,7 +1775,6 @@ def add_data(self, rdate, alist, area=True, verbose=True): # find first valid monitoringplan by date. mrequest = None - iii = 0 for udate in datelist: mrequest = fac.get_unit_request(oris, mid, udate) if mrequest: @@ -1789,7 +1801,7 @@ def add_data(self, rdate, alist, area=True, verbose=True): method = [] # add emissions for each quarter list. for meth in method: - rvalue = self.add_emissions(oris, mid, datelist, meth) + _ = self.add_emissions(oris, mid, datelist, meth) # print(dflist) # create dataframe from dflist. @@ -1823,6 +1835,7 @@ def add_data(self, rdate, alist, area=True, verbose=True): if emitdf.empty: return emitdf emitdf = emitdf.dropna(axis=0, subset=["so2_lbs"]) + # The LME data sometimes has duplicate rows. # causing emissions to be over-estimated. # emitdf = emitdf.drop_duplicates() @@ -1866,8 +1879,8 @@ def badrow(rrr): try: tempdf = emitdf.dropna(axis=0, subset=[ccc]) print(ccc + " na dropped", tempdf.shape) - except: - print(ccc + " cannot drop error") + except Exception: + print(ccc + " cannot drop, error") # print('stackht is na ---------------------------------') # tempdf = emitdf[emitdf['stackht'].isnull()] # print(tempdf[['oris','so2_lbs','unit','time local','stackht']]) @@ -1954,6 +1967,6 @@ def match_column(df, varname): def latlon2str(lat, lon): - latstr = "{:.4}".format(lat) - lonstr = "{:.4}".format(lon) + latstr = f"{lat:.4}" + lonstr = f"{lon:.4}" return (latstr, lonstr) diff --git a/monetio/obs/cems_mod.py b/monetio/obs/cems_mod.py index 4f7412b0..c3d2e9fa 100644 --- a/monetio/obs/cems_mod.py +++ b/monetio/obs/cems_mod.py @@ -1,10 +1,3 @@ -import datetime -import os - -import numpy as np -import pandas as pd - - """ NAME: cems_mod.py PGRMMER: Alice Crawford ORG: ARL @@ -12,6 +5,11 @@ Python 3 ################################################################# """ +import datetime +import os + +import numpy as np +import pandas as pd def getdegrees(degrees, minutes, seconds): @@ -52,10 +50,10 @@ def get_date_fmt(date, verbose=False): string. """ if verbose: - print('Determining date format') + print("Determining date format") if verbose: print(date) - temp = date.split('-') + temp = date.split("-") if len(temp[0]) == 4: fmt = "%Y-%m-%d %H" else: @@ -63,7 +61,7 @@ def get_date_fmt(date, verbose=False): return fmt -class CEMS(object): +class CEMS: """ Class for data from continuous emission monitoring systems (CEMS). Data from power plants can be downloaded from @@ -98,7 +96,7 @@ def __init__(self): self.url = "ftp://newftp.epa.gov/DmDnLoad/emissions/" self.lb2kg = 0.453592 # number of kilograms per pound. self.info = "Data from continuous emission monitoring systems (CEMS)\n" - self.info += self.url + '\n' + self.info += self.url + "\n" self.df = pd.DataFrame() self.namehash = {} # if columns are renamed keeps track of original names. # Each facility may have more than one unit which is specified by the @@ -107,7 +105,7 @@ def __init__(self): def __str__(self): return self.info - def add_data(self, rdate, states=['md'], download=False, verbose=True): + def add_data(self, rdate, states=["md"], download=False, verbose=True): """ gets the ftp url from the retrieve method and then loads the data from the ftp site using the load method. @@ -153,7 +151,7 @@ def add_data(self, rdate, states=['md'], download=False, verbose=True): else: rdatelist = [rdate] for rd in rdatelist: - print('getting data') + print("getting data") print(rd) for st in states: url = self.retrieve(rd, st, download=download, verbose=verbose) @@ -204,19 +202,27 @@ def cemspivot(self, varname, daterange=None, unitid=False, verbose=True): temp = self.df.copy() if daterange: temp = timefilter(temp, daterange) - if 'unit_id' in temp.columns.values and unitid: - if temp['unit_id'].unique(): + if "unit_id" in temp.columns.values and unitid: + if temp["unit_id"].unique(): if verbose: - print('UNIT IDs ', temp['unit_id'].unique()) + print("UNIT IDs ", temp["unit_id"].unique()) # create pandas frame with index datetime and columns for value for # each unit_id,orispl - pivot = pd.pivot_table(temp, values=varname, index=['time'], columns=['orispl_code', 'unit_id'], aggfunc=np.sum) + pivot = pd.pivot_table( + temp, + values=varname, + index=["time"], + columns=["orispl_code", "unit_id"], + aggfunc=np.sum, + ) else: if verbose: - print('NO UNIT ID') + print("NO UNIT ID") # returns data frame where rows are date and columns are the values # of cmatch for orispl - pivot = pd.pivot_table(temp, values=varname, index=['time'], columns=['orispl_code'], aggfunc=np.sum) + pivot = pd.pivot_table( + temp, values=varname, index=["time"], columns=["orispl_code"], aggfunc=np.sum + ) return pivot def get_var(self, varname, orisp=None, daterange=None, unitid=-99, verbose=True): @@ -279,15 +285,15 @@ def retrieve(self, rdate, state, download=True, verbose=False): """ # import requests # TO DO: requests does not support ftp sites. - efile = 'empty' + efile = "empty" ftpsite = self.url - ftpsite += 'hourly/' - ftpsite += 'monthly/' - ftpsite += rdate.strftime("%Y") + '/' + ftpsite += "hourly/" + ftpsite += "monthly/" + ftpsite += rdate.strftime("%Y") + "/" print(ftpsite) print(rdate) print(state) - fname = rdate.strftime("%Y") + state + rdate.strftime("%m") + '.zip' + fname = rdate.strftime("%Y") + state + rdate.strftime("%m") + ".zip" if not download: efile = ftpsite + fname if not os.path.isfile(fname): @@ -296,13 +302,13 @@ def retrieve(self, rdate, state, download=True, verbose=False): # open(efile, 'wb').write(r.content) # print('retrieved ' + ftpsite + fname) efile = ftpsite + fname - print('WARNING: Downloading file not supported at this time') - print('you may download manually using the following address') + print("WARNING: Downloading file not supported at this time") + print("you may download manually using the following address") print(efile) else: - print('file exists ' + fname) + print("file exists " + fname) efile = fname - self.info += 'File retrieved :' + efile + '\n' + self.info += "File retrieved :" + efile + "\n" return efile def create_location_dictionary(self, verbose=False): @@ -310,9 +316,9 @@ def create_location_dictionary(self, verbose=False): returns dictionary withe key orispl_code and value (latitude, longitude) tuple """ - if 'latitude' in list(self.df.columns.values): + if "latitude" in list(self.df.columns.values): dftemp = self.df.copy() - pairs = zip(dftemp['orispl_code'], zip(dftemp['latitude'], dftemp['longitude'])) + pairs = zip(dftemp["orispl_code"], zip(dftemp["latitude"], dftemp["longitude"])) pairs = list(set(pairs)) lhash = dict(pairs) # key is facility id and value is name. if verbose: @@ -325,9 +331,9 @@ def create_name_dictionary(self, verbose=False): """ returns dictionary withe key orispl_code and value facility name """ - if 'latitude' in list(self.df.columns.values): + if "latitude" in list(self.df.columns.values): dftemp = self.df.copy() - pairs = zip(dftemp['orispl_code'], dftemp['facility_name']) + pairs = zip(dftemp["orispl_code"], dftemp["facility_name"]) pairs = list(set(pairs)) lhash = dict(pairs) # key is facility id and value is name. if verbose: @@ -349,28 +355,36 @@ def columns_rename(self, columns, verbose=False): """ rcolumn = [] for ccc in columns: - if 'facility' in ccc.lower() and 'name' in ccc.lower(): - rcolumn = self.rename(ccc, 'facility_name', rcolumn, verbose) - elif 'orispl' in ccc.lower(): - rcolumn = self.rename(ccc, 'orispl_code', rcolumn, verbose) - elif 'facility' in ccc.lower() and 'id' in ccc.lower(): - rcolumn = self.rename(ccc, 'fac_id', rcolumn, verbose) - elif 'so2' in ccc.lower() and ('lbs' in ccc.lower() or 'pounds' in ccc.lower()) and ('rate' not in ccc.lower()): - rcolumn = self.rename(ccc, 'so2_lbs', rcolumn, verbose) - elif 'nox' in ccc.lower() and ('lbs' in ccc.lower() or 'pounds' in ccc.lower()) and ('rate' not in ccc.lower()): - rcolumn = self.rename(ccc, 'nox_lbs', rcolumn, verbose) - elif 'co2' in ccc.lower() and ('short' in ccc.lower() and 'tons' in ccc.lower()): - rcolumn = self.rename(ccc, 'co2_short_tons', rcolumn, verbose) - elif 'date' in ccc.lower(): - rcolumn = self.rename(ccc, 'date', rcolumn, verbose) - elif 'hour' in ccc.lower(): - rcolumn = self.rename(ccc, 'hour', rcolumn, verbose) - elif 'lat' in ccc.lower(): - rcolumn = self.rename(ccc, 'latitude', rcolumn, verbose) - elif 'lon' in ccc.lower(): - rcolumn = self.rename(ccc, 'longitude', rcolumn, verbose) - elif 'state' in ccc.lower(): - rcolumn = self.rename(ccc, 'state_name', rcolumn, verbose) + if "facility" in ccc.lower() and "name" in ccc.lower(): + rcolumn = self.rename(ccc, "facility_name", rcolumn, verbose) + elif "orispl" in ccc.lower(): + rcolumn = self.rename(ccc, "orispl_code", rcolumn, verbose) + elif "facility" in ccc.lower() and "id" in ccc.lower(): + rcolumn = self.rename(ccc, "fac_id", rcolumn, verbose) + elif ( + "so2" in ccc.lower() + and ("lbs" in ccc.lower() or "pounds" in ccc.lower()) + and ("rate" not in ccc.lower()) + ): + rcolumn = self.rename(ccc, "so2_lbs", rcolumn, verbose) + elif ( + "nox" in ccc.lower() + and ("lbs" in ccc.lower() or "pounds" in ccc.lower()) + and ("rate" not in ccc.lower()) + ): + rcolumn = self.rename(ccc, "nox_lbs", rcolumn, verbose) + elif "co2" in ccc.lower() and ("short" in ccc.lower() and "tons" in ccc.lower()): + rcolumn = self.rename(ccc, "co2_short_tons", rcolumn, verbose) + elif "date" in ccc.lower(): + rcolumn = self.rename(ccc, "date", rcolumn, verbose) + elif "hour" in ccc.lower(): + rcolumn = self.rename(ccc, "hour", rcolumn, verbose) + elif "lat" in ccc.lower(): + rcolumn = self.rename(ccc, "latitude", rcolumn, verbose) + elif "lon" in ccc.lower(): + rcolumn = self.rename(ccc, "longitude", rcolumn, verbose) + elif "state" in ccc.lower(): + rcolumn = self.rename(ccc, "state_name", rcolumn, verbose) else: rcolumn.append(ccc.strip().lower()) return rcolumn @@ -392,7 +406,7 @@ def rename(self, ccc, newname, rcolumn, verbose): self.namehash[newname] = ccc rcolumn.append(newname) if verbose: - print(ccc + ' to ' + newname) + print(ccc + " to " + newname) return rcolumn def add_info(self, dftemp): @@ -415,48 +429,54 @@ def add_info(self, dftemp): dftemp: pandas dataframe """ basedir = os.path.abspath(os.path.dirname(__file__))[:-3] - iname = os.path.join(basedir, 'data', 'cemsinfo.csv') + iname = os.path.join(basedir, "data", "cemsinfo.csv") # iname = os.path.join(basedir, 'data', 'cem_facility_loc.csv') method = 1 # TO DO: Having trouble with pytest throwing an error when using the # apply on the dataframe. # runs ok, but pytest fails. Tried several differnt methods. if os.path.isfile(iname): - sinfo = pd.read_csv(iname, sep=',', header=0) + sinfo = pd.read_csv(iname, sep=",", header=0) try: - dftemp.drop(['latitude', 'longitude'], axis=1, inplace=True) + dftemp.drop(["latitude", "longitude"], axis=1, inplace=True) except Exception: pass - dfnew = pd.merge(dftemp, sinfo, how='left', left_on=['orispl_code'], right_on=['orispl_code']) + dfnew = pd.merge( + dftemp, sinfo, how="left", left_on=["orispl_code"], right_on=["orispl_code"] + ) # print('---------z-----------') # print(dfnew.columns.values) # remove stations which do not have a time offset. - dfnew.dropna(axis=0, subset=['time_offset'], inplace=True) + dfnew.dropna(axis=0, subset=["time_offset"], inplace=True) if method == 1: # this runs ok but fails pytest def i2o(x): - return datetime.timedelta(hours=x['time_offset']) + return datetime.timedelta(hours=x["time_offset"]) - dfnew['time_offset'] = dfnew.apply(i2o, axis=1) - dfnew['time'] = dfnew['time local'] + dfnew['time_offset'] + dfnew["time_offset"] = dfnew.apply(i2o, axis=1) + dfnew["time"] = dfnew["time local"] + dfnew["time_offset"] elif method == 2: # this runs ok but fails pytest def utc(x): - return pd.Timestamp(x['time local']) + datetime.timedelta(hours=x['time_offset']) + return pd.Timestamp(x["time local"]) + datetime.timedelta( + hours=x["time_offset"] + ) - dfnew['time'] = dfnew.apply(utc, axis=1) + dfnew["time"] = dfnew.apply(utc, axis=1) elif method == 3: # this runs ok but fails pytest def utc(x, y): return x + datetime.timedelta(hours=y) - dfnew['time'] = dfnew.apply(lambda row: utc(row['time local'], row['time_offset']), axis=1) + dfnew["time"] = dfnew.apply( + lambda row: utc(row["time local"], row["time_offset"]), axis=1 + ) # remove the time_offset column. - dfnew.drop(['time_offset'], axis=1, inplace=True) + dfnew.drop(["time_offset"], axis=1, inplace=True) mlist = dftemp.columns.values.tolist() # merge the dataframes back together to include rows with no info # in the cemsinfo.csv - dftemp = pd.merge(dftemp, dfnew, how='left', left_on=mlist, right_on=mlist) + dftemp = pd.merge(dftemp, dfnew, how="left", left_on=mlist, right_on=mlist) return dftemp # return dfnew @@ -472,35 +492,37 @@ def load(self, efile, verbose=True): """ # pandas read_csv can read either from a file or url. - dftemp = pd.read_csv(efile, sep=',', index_col=False, header=0) + dftemp = pd.read_csv(efile, sep=",", index_col=False, header=0) columns = list(dftemp.columns.values) columns = self.columns_rename(columns, verbose) dftemp.columns = columns if verbose: print(columns) - dfmt = get_date_fmt(dftemp['date'][0], verbose=verbose) + dfmt = get_date_fmt(dftemp["date"][0], verbose=verbose) # create column with datetime information # from column with month-day-year and column with hour. - dftime = dftemp.apply(lambda x: pd.datetime.strptime("{0} {1}".format(x['date'], x['hour']), dfmt), axis=1) + dftime = dftemp.apply( + lambda x: pd.datetime.strptime("{} {}".format(x["date"], x["hour"]), dfmt), axis=1 + ) dftemp = pd.concat([dftime, dftemp], axis=1) - dftemp.rename(columns={0: 'time local'}, inplace=True) - dftemp.drop(['date', 'hour'], axis=1, inplace=True) + dftemp.rename(columns={0: "time local"}, inplace=True) + dftemp.drop(["date", "hour"], axis=1, inplace=True) # -------------Load supplmental data----------------------- # contains info on facility id, lat, lon, time offset from UTC. # allows transformation from local time to UTC. dftemp = self.add_info(dftemp) - if ['year'] in columns: - dftemp.drop(['year'], axis=1, inplace=True) + if ["year"] in columns: + dftemp.drop(["year"], axis=1, inplace=True) if self.df.empty: self.df = dftemp if verbose: - print('Initializing pandas dataframe. Loading ' + efile) + print("Initializing pandas dataframe. Loading " + efile) else: self.df = self.df.append(dftemp) if verbose: - print('Appending to pandas dataframe. Loading ' + efile) + print("Appending to pandas dataframe. Loading " + efile) # if verbose: print(dftemp[0:10]) return dftemp diff --git a/monetio/obs/crn.py b/monetio/obs/crn.py index fbc9fe9c..999aa388 100644 --- a/monetio/obs/crn.py +++ b/monetio/obs/crn.py @@ -113,7 +113,6 @@ import inspect import os -from builtins import object, zip import pandas as pd from numpy import array @@ -121,146 +120,162 @@ def add_data(dates, param=None, daily=False, sub_hourly=False, download=False, latlonbox=None): a = CRN() - df = a.add_data(dates, daily=daily, sub_hourly=sub_hourly, download=download, latlonbox=latlonbox) + df = a.add_data( + dates, daily=daily, sub_hourly=sub_hourly, download=download, latlonbox=latlonbox + ) return df -class CRN(object): +class CRN: def __init__(self): self.dates = None self.daily = False self.ftp = None self.df = pd.DataFrame() - self.se_states = array(['AL', 'FL', 'GA', 'MS', 'NC', 'SC', 'TN', 'VA', 'WV'], dtype='|S14') - self.ne_states = array(['CT', 'DE', 'DC', 'ME', 'MD', 'MA', 'NH', 'NJ', 'NY', 'PA', 'RI', 'VT'], dtype='|S20') - self.nc_states = array(['IL', 'IN', 'IA', 'KY', 'MI', 'MN', 'MO', 'OH', 'WI'], dtype='|S9') - self.sc_states = array(['AR', 'LA', 'OK', 'TX'], dtype='|S9') - self.r_states = array(['AZ', 'CO', 'ID', 'KS', 'MT', 'NE', 'NV', 'NM', 'ND', 'SD', 'UT', 'WY'], dtype='|S12') - self.p_states = array(['CA', 'OR', 'WA'], dtype='|S10') - self.objtype = 'CRN' - self.monitor_file = inspect.getfile(self.__class__)[:-18] + 'data/stations.tsv' + self.se_states = array(["AL", "FL", "GA", "MS", "NC", "SC", "TN", "VA", "WV"], dtype="|S14") + self.ne_states = array( + ["CT", "DE", "DC", "ME", "MD", "MA", "NH", "NJ", "NY", "PA", "RI", "VT"], dtype="|S20" + ) + self.nc_states = array(["IL", "IN", "IA", "KY", "MI", "MN", "MO", "OH", "WI"], dtype="|S9") + self.sc_states = array(["AR", "LA", "OK", "TX"], dtype="|S9") + self.r_states = array( + ["AZ", "CO", "ID", "KS", "MT", "NE", "NV", "NM", "ND", "SD", "UT", "WY"], dtype="|S12" + ) + self.p_states = array(["CA", "OR", "WA"], dtype="|S10") + self.objtype = "CRN" + self.monitor_file = inspect.getfile(self.__class__)[:-18] + "data/stations.tsv" self.monitor_df = None - self.baseurl = 'https://www1.ncdc.noaa.gov/pub/data/uscrn/products/' + self.baseurl = "https://www1.ncdc.noaa.gov/pub/data/uscrn/products/" self.hcols = [ - 'WBANNO', - 'UTC_DATE', - 'UTC_TIME', - 'LST_DATE', - 'LST_TIME', - 'CRX_VN', - 'LONGITUDE', - 'LATITUDE', - 'T_CALC', - 'T_AVG', - 'T_MAX', - 'T_MIN', - 'P_CALC', - 'SOLARAD', - 'SOLARAD_FLAG', - 'SOLARAD_MAX', - 'SOLARAD_MAX_FLAG', - 'SOLARAD_MIN', - 'SOLARAD_MIN_FLAG', - 'SUR_TEMP_TYPE', - 'SUR_TEMP', - 'SUR_TEMP_FLAG', - 'SUR_TEMP_MAX', - 'SUR_TEMP_MAX_FLAG', - 'SUR_TEMP_MIN', - 'SUR_TEMP_MIN_FLAG', - 'RH_AVG', - 'RH_AVG_FLAG', - 'SOIL_MOISTURE_5', - 'SOIL_MOISTURE_10', - 'SOIL_MOISTURE_20', - 'SOIL_MOISTURE_50', - 'SOIL_MOISTURE_100', - 'SOIL_TEMP_5', - 'SOIL_TEMP_10', - 'SOIL_TEMP_20', - 'SOIL_TEMP_50', - 'SOIL_TEMP_100', + "WBANNO", + "UTC_DATE", + "UTC_TIME", + "LST_DATE", + "LST_TIME", + "CRX_VN", + "LONGITUDE", + "LATITUDE", + "T_CALC", + "T_AVG", + "T_MAX", + "T_MIN", + "P_CALC", + "SOLARAD", + "SOLARAD_FLAG", + "SOLARAD_MAX", + "SOLARAD_MAX_FLAG", + "SOLARAD_MIN", + "SOLARAD_MIN_FLAG", + "SUR_TEMP_TYPE", + "SUR_TEMP", + "SUR_TEMP_FLAG", + "SUR_TEMP_MAX", + "SUR_TEMP_MAX_FLAG", + "SUR_TEMP_MIN", + "SUR_TEMP_MIN_FLAG", + "RH_AVG", + "RH_AVG_FLAG", + "SOIL_MOISTURE_5", + "SOIL_MOISTURE_10", + "SOIL_MOISTURE_20", + "SOIL_MOISTURE_50", + "SOIL_MOISTURE_100", + "SOIL_TEMP_5", + "SOIL_TEMP_10", + "SOIL_TEMP_20", + "SOIL_TEMP_50", + "SOIL_TEMP_100", ] self.dcols = [ - 'WBANNO', - 'LST_DATE', - 'CRX_VN', - 'LONGITUDE', - 'LATITUDE', - 'T_MAX', - 'T_MIN', - 'T_MEAN', - 'T_AVG', - 'P_CALC', - 'SOLARAD', - 'SUR_TEMP_TYPE', - 'SUR_TEMP_MAX', - 'SUR_TEMP_MAX', - 'SUR_TEMP_MIN', - 'SUR_TEMP_AVG', - 'RH_MAX', - 'RH_MIN', - 'RH_AVG', - 'SOIL_MOISTURE_5', - 'SOIL_MOISTURE_10', - 'SOIL_MOISTURE_20', - 'SOIL_MOISTURE_50', - 'SOIL_MOISTURE_100', - 'SOIL_TEMP_5', - 'SOIL_TEMP_10', - 'SOIL_TEMP_20', - 'SOIL_TEMP_50', - 'SOIL_TEMP_100', + "WBANNO", + "LST_DATE", + "CRX_VN", + "LONGITUDE", + "LATITUDE", + "T_MAX", + "T_MIN", + "T_MEAN", + "T_AVG", + "P_CALC", + "SOLARAD", + "SUR_TEMP_TYPE", + "SUR_TEMP_MAX", + "SUR_TEMP_MAX", + "SUR_TEMP_MIN", + "SUR_TEMP_AVG", + "RH_MAX", + "RH_MIN", + "RH_AVG", + "SOIL_MOISTURE_5", + "SOIL_MOISTURE_10", + "SOIL_MOISTURE_20", + "SOIL_MOISTURE_50", + "SOIL_MOISTURE_100", + "SOIL_TEMP_5", + "SOIL_TEMP_10", + "SOIL_TEMP_20", + "SOIL_TEMP_50", + "SOIL_TEMP_100", ] self.shcols = [ - 'WBANNO', - 'UTC_DATE', - 'UTC_TIME', - 'LST_DATE', - 'LST_TIME', - 'CRX_VN', - 'LONGITUDE', - 'LATITUDE', - 'T_MEAN', - 'P_CALC', - 'SOLARAD', - 'SOLARAD_FLAG', - 'SUR_TEMP_AVG', - 'SUR_TEMP_TYPE', - 'SUR_TEMP_FLAG', - 'RH_AVG', - 'RH_FLAG', - 'SOIL_MOISTURE_5', - 'SOIL_TEMP_5', - 'WETNESS', - 'WET_FLAG', - 'WIND', - 'WIND_FLAG', + "WBANNO", + "UTC_DATE", + "UTC_TIME", + "LST_DATE", + "LST_TIME", + "CRX_VN", + "LONGITUDE", + "LATITUDE", + "T_MEAN", + "P_CALC", + "SOLARAD", + "SOLARAD_FLAG", + "SUR_TEMP_AVG", + "SUR_TEMP_TYPE", + "SUR_TEMP_FLAG", + "RH_AVG", + "RH_FLAG", + "SOIL_MOISTURE_5", + "SOIL_TEMP_5", + "WETNESS", + "WET_FLAG", + "WIND", + "WIND_FLAG", ] - self.citiation = 'Diamond, H. J., T. R. Karl, M. A. Palecki, C. B. Baker, J. E. Bell, R. D. Leeper, D. R. Easterling, J. H. ' - ' Lawrimore, T. P. Meyers, M. R. Helfert, G. Goodge, and P. W. Thorne,' - ' 2013: U.S. Climate Reference Network after one decade of operations:' - ' status and assessment. Bull. Amer. Meteor. Soc., 94, 489-498. ' - 'doi: 10.1175/BAMS-D-12-00170.1' - self.citation2 = 'Bell, J. E., M. A. Palecki, C. B. Baker, W. G. ' - 'Collins, J. H. Lawrimore, R. D. Leeper, M. E. Hall, J. Kochendorfer, ' - 'T. P. Meyers, T. Wilson, and H. J. Diamond. 2013: U.S. Climate ' - 'Reference Network soil moisture and temperature observations. J. ' - 'Hydrometeorol., 14, 977-988. doi: 10.1175/JHM-D-12-0146.1' + self.citiation = "Diamond, H. J., T. R. Karl, M. A. Palecki, C. B. Baker, J. E. Bell, R. D. Leeper, D. R. Easterling, J. H. " + " Lawrimore, T. P. Meyers, M. R. Helfert, G. Goodge, and P. W. Thorne," + " 2013: U.S. Climate Reference Network after one decade of operations:" + " status and assessment. Bull. Amer. Meteor. Soc., 94, 489-498. " + "doi: 10.1175/BAMS-D-12-00170.1" + self.citation2 = "Bell, J. E., M. A. Palecki, C. B. Baker, W. G. " + "Collins, J. H. Lawrimore, R. D. Leeper, M. E. Hall, J. Kochendorfer, " + "T. P. Meyers, T. Wilson, and H. J. Diamond. 2013: U.S. Climate " + "Reference Network soil moisture and temperature observations. J. " + "Hydrometeorol., 14, 977-988. doi: 10.1175/JHM-D-12-0146.1" def load_file(self, url): nanvals = [-99999, -9999.0] - if 'CRND0103' in url: + if "CRND0103" in url: cols = self.dcols - df = pd.read_csv(url, delim_whitespace=True, names=cols, parse_dates={'time_local': [1]}, infer_datetime_format=True, na_values=nanvals) + df = pd.read_csv( + url, + delim_whitespace=True, + names=cols, + parse_dates={"time_local": [1]}, + infer_datetime_format=True, + na_values=nanvals, + ) self.daily = True - elif 'CRNS0101' in url: + elif "CRNS0101" in url: cols = self.shcols df = pd.read_csv( url, delim_whitespace=True, names=cols, - parse_dates={'time': ['UTC_DATE', 'UTC_TIME'], 'time_local': ['LST_DATE', 'LST_TIME']}, + parse_dates={ + "time": ["UTC_DATE", "UTC_TIME"], + "time_local": ["LST_DATE", "LST_TIME"], + }, infer_datetime_format=True, na_values=nanvals, ) @@ -270,7 +285,10 @@ def load_file(self, url): url, delim_whitespace=True, names=cols, - parse_dates={'time': ['UTC_DATE', 'UTC_TIME'], 'time_local': ['LST_DATE', 'LST_TIME']}, + parse_dates={ + "time": ["UTC_DATE", "UTC_TIME"], + "time_local": ["LST_DATE", "LST_TIME"], + }, infer_datetime_format=True, na_values=nanvals, ) @@ -278,15 +296,15 @@ def load_file(self, url): def build_url(self, year, state, site, vector, daily=False, sub_hourly=False): if daily: - beginning = self.baseurl + 'daily01/' + year + '/' - fname = 'CRND0103-' + beginning = self.baseurl + "daily01/" + year + "/" + fname = "CRND0103-" elif sub_hourly: - beginning = self.baseurl + 'subhourly01/' + year + '/' - fname = 'CRNS0101-05-' + beginning = self.baseurl + "subhourly01/" + year + "/" + fname = "CRNS0101-05-" else: - beginning = self.baseurl + 'hourly02/' + year + '/' - fname = 'CRNH0203-' - rest = year + '-' + state + '_' + site + '_' + vector + '.txt' + beginning = self.baseurl + "hourly02/" + year + "/" + fname = "CRNH0203-" + rest = year + "-" + state + "_" + site + "_" + vector + ".txt" url = beginning + fname + rest fname = fname + rest return url, fname @@ -333,16 +351,18 @@ def build_urls(self, monitors, dates, daily=False, sub_hourly=False): Description of returned object. """ - print('Building and checking urls...') + print("Building and checking urls...") years = pd.DatetimeIndex(dates).year.unique().astype(str) urls = [] fnames = [] for i in monitors.index: for y in years: state = monitors.iloc[i].STATE - site = monitors.iloc[i].LOCATION.replace(' ', '_') - vector = monitors.iloc[i].VECTOR.replace(' ', '_') - url, fname = self.build_url(y, state, site, vector, daily=daily, sub_hourly=sub_hourly) + site = monitors.iloc[i].LOCATION.replace(" ", "_") + vector = monitors.iloc[i].VECTOR.replace(" ", "_") + url, fname = self.build_url( + y, state, site, vector, daily=daily, sub_hourly=sub_hourly + ) if self.check_url(url): urls.append(url) fnames.append(fname) @@ -372,12 +392,12 @@ def retrieve(self, url, fname): import wget if not os.path.isfile(fname): - print('Retrieving: ' + fname) + print("Retrieving: " + fname) print(url) - print('\n') + print("\n") wget.download(url) else: - print('File Exists: ' + fname) + print("File Exists: " + fname) def add_data(self, dates, daily=False, sub_hourly=False, download=False, latlonbox=None): """Short summary. @@ -408,7 +428,12 @@ def add_data(self, dates, daily=False, sub_hourly=False, download=False, latlonb self.get_monitor_df() if latlonbox is not None: # get them all[latmin,lonmin,latmax,lonmax] mdf = self.monitor_df - con = (mdf.LATITUDE >= latlonbox[0]) & (mdf.LATITUDE <= latlonbox[2]) & (mdf.LONGITUDE >= latlonbox[1]) & (mdf.LONGITUDE <= latlonbox[3]) + con = ( + (mdf.LATITUDE >= latlonbox[0]) + & (mdf.LATITUDE <= latlonbox[2]) + & (mdf.LONGITUDE >= latlonbox[1]) + & (mdf.LONGITUDE <= latlonbox[3]) + ) monitors = mdf.loc[con].copy() else: monitors = self.monitor_df.copy() @@ -421,13 +446,15 @@ def add_data(self, dates, daily=False, sub_hourly=False, download=False, latlonb dfs = [dask.delayed(self.load_file)(i) for i in urls] dff = dd.from_delayed(dfs) self.df = dff.compute() - self.df = pd.merge(self.df, monitors, how='left', on=['WBANNO', 'LATITUDE', 'LONGITUDE']) - if ~self.df.columns.isin(['time']).max(): - self.df['time'] = self.df.time_local + pd.to_timedelta(self.df.GMT_OFFSET, unit='H') - id_vars = self.monitor_df.columns.append(pd.Index(['time', 'time_local'])) + self.df = pd.merge(self.df, monitors, how="left", on=["WBANNO", "LATITUDE", "LONGITUDE"]) + if ~self.df.columns.isin(["time"]).max(): + self.df["time"] = self.df.time_local + pd.to_timedelta(self.df.GMT_OFFSET, unit="H") + id_vars = self.monitor_df.columns.append(pd.Index(["time", "time_local"])) keys = self.df.columns[self.df.columns.isin(id_vars)] - self.df = pd.melt(self.df, id_vars=keys, var_name='variable', value_name='obs') # this stacks columns to be inline with MONET - self.df.rename(columns={'WBANNO': 'siteid'}, inplace=True) + self.df = pd.melt( + self.df, id_vars=keys, var_name="variable", value_name="obs" + ) # this stacks columns to be inline with MONET + self.df.rename(columns={"WBANNO": "siteid"}, inplace=True) self.change_units() self.df.columns = [i.lower() for i in self.df.columns] @@ -449,26 +476,26 @@ def change_units(self): Description of returned object. """ - self.df['units'] = '' - for i in self.df.variable.unique(): - if self.daily and i is 'SOLARAD': - self.df.loc[self.df.variable == i, 'units'] = 'MJ/m^2' - elif 'T_' in i: - self.df.loc[self.df.variable == i, 'units'] = 'K' - self.df.loc[self.df.variable == i, 'obs'] += 273.15 - elif 'FLAG' in i or 'TYPE' in i: + self.df["units"] = "" + for v in self.df.variable.unique(): + if self.daily and v == "SOLARAD": + self.df.loc[self.df.variable == v, "units"] = "MJ/m^2" + elif "T_" in v: + self.df.loc[self.df.variable == v, "units"] = "K" + self.df.loc[self.df.variable == v, "obs"] += 273.15 + elif "FLAG" in v or "TYPE" in v: pass - elif 'TEMP' in i: - self.df.loc[self.df.variable == i, 'units'] = 'K' - self.df.loc[self.df.variable == i, 'obs'] += 273.15 - elif 'MOISTURE' in i: - self.df.loc[self.df.variable == i, 'units'] = 'm^3/m^3' - elif 'RH' in i: - self.df.loc[self.df.variable == i, 'units'] = '%' - elif 'P_CALC' is i: - self.df.loc[self.df.variable == i, 'units'] = 'mm' - - def set_daterange(self, begin='', end=''): + elif "TEMP" in v: + self.df.loc[self.df.variable == v, "units"] = "K" + self.df.loc[self.df.variable == v, "obs"] += 273.15 + elif "MOISTURE" in v: + self.df.loc[self.df.variable == v, "units"] = "m^3/m^3" + elif "RH" in v: + self.df.loc[self.df.variable == v, "units"] = "%" + elif v == "P_CALC": + self.df.loc[self.df.variable == v, "units"] = "mm" + + def set_daterange(self, begin="", end=""): """Short summary. Parameters @@ -484,7 +511,7 @@ def set_daterange(self, begin='', end=''): Description of returned object. """ - dates = pd.date_range(start=begin, end=end, freq='H').values.astype('M8[s]').astype('O') + dates = pd.date_range(start=begin, end=end, freq="H").values.astype("M8[s]").astype("O") self.dates = dates def get_monitor_df(self): @@ -496,4 +523,4 @@ def get_monitor_df(self): Description of returned object. """ - self.monitor_df = pd.read_csv(self.monitor_file, delimiter='\t') + self.monitor_df = pd.read_csv(self.monitor_file, delimiter="\t") diff --git a/monetio/obs/epa_util.py b/monetio/obs/epa_util.py index a0497625..bd56e182 100644 --- a/monetio/obs/epa_util.py +++ b/monetio/obs/epa_util.py @@ -1,7 +1,4 @@ -import pandas as pd - - -def convert_epa_unit(df, obscolumn='SO2', unit='UG/M3'): +def convert_epa_unit(df, obscolumn="SO2", unit="UG/M3"): """ converts ppb to ug/m3 for SO2 in aqs and airnow datasets See 40 CFR Part 50.5, Appendix A-1 to part 50, appendix A=2 to Part 50. @@ -28,19 +25,19 @@ def convert_epa_unit(df, obscolumn='SO2', unit='UG/M3'): unit. """ factor = 2.6178 - ppb = 'ppb' - ugm3 = 'ug/m3' + ppb = "ppb" + ugm3 = "ug/m3" if unit.lower() == ugm3: - df = df[df['units'] == ppb] # find columns with units of 'ppb' - df['units'] = unit.upper() + df = df[df["units"] == ppb] # find columns with units of 'ppb' + df["units"] = unit.upper() df[obscolumn] = df[obscolumn] * factor elif unit.lower() == ppb: - df = df[df['units'] == ugm3] # find columns with units of 'ppb' + df = df[df["units"] == ugm3] # find columns with units of 'ppb' df[obscolumn] = df[obscolumn] / factor return df -def check_cmaq_units(df, param='O3', aqs_param='OZONE'): +def check_cmaq_units(df, param="O3", aqs_param="OZONE"): """Short summary. Parameters @@ -60,19 +57,19 @@ def check_cmaq_units(df, param='O3', aqs_param='OZONE'): """ aunit = df[df.variable == aqs_param].Units.unique()[0] - if aunit == 'UG/M3': + if aunit == "UG/M3": fac = 1.0 - elif aunit == 'PPB': + elif aunit == "PPB": fac = 1000.0 - elif aunit == 'ppbC': + elif aunit == "ppbC": fac = 1000.0 - if aqs_param == 'ISOPRENE': + if aqs_param == "ISOPRENE": fac *= 5.0 - elif aqs_param == 'BENZENE': + elif aqs_param == "BENZENE": fac *= 6.0 - elif aqs_param == 'TOLUENE': + elif aqs_param == "TOLUENE": fac *= 7.0 - elif aqs_param == 'O-XYLENE': + elif aqs_param == "O-XYLENE": fac *= 8.0 else: fac = 1.0 @@ -97,7 +94,12 @@ def ensure_values_indomain(df, lon, lat): Description of returned object. """ - con = (df.Latitude.values > lat.min()) & (df.Latitude.values < lat.max()) & (df.Longitude.values > lon.min()) & (df.Longitude.values < lon.max()) + con = ( + (df.Latitude.values > lat.min()) + & (df.Latitude.values < lat.max()) + & (df.Longitude.values > lon.min()) + & (df.Longitude.values < lon.max()) + ) df = df[con].copy() return df @@ -120,25 +122,25 @@ def get_region(df): from numpy import array, concatenate from pandas import DataFrame, merge - se = array(['AL', 'FL', 'GA', 'MS', 'NC', 'SC', 'TN', 'VA', 'WV']) - ne = array(['CT', 'DE', 'DC', 'ME', 'MD', 'MA', 'NH', 'NJ', 'NY', 'PA', 'RI', 'VT']) - nc = array(['IL', 'IN', 'IA', 'KY', 'MI', 'MN', 'MO', 'OH', 'WI']) - sc = array(['AR', 'LA', 'OK', 'TX']) - r = array(['AZ', 'CO', 'ID', 'KS', 'MT', 'NE', 'NV', 'NM', 'ND', 'SD', 'UT', 'WY']) - p = array(['CA', 'OR', 'WA']) - ner = array(['Northeast' for i in ne]) - ser = array(['Southeast' for i in se]) - ncr = array(['North_Central' for i in nc]) - scr = array(['South_Central' for i in sc]) - rr = array(['Rockies' for i in r]) - pr = array(['Pacific' for i in p]) + se = array(["AL", "FL", "GA", "MS", "NC", "SC", "TN", "VA", "WV"]) + ne = array(["CT", "DE", "DC", "ME", "MD", "MA", "NH", "NJ", "NY", "PA", "RI", "VT"]) + nc = array(["IL", "IN", "IA", "KY", "MI", "MN", "MO", "OH", "WI"]) + sc = array(["AR", "LA", "OK", "TX"]) + r = array(["AZ", "CO", "ID", "KS", "MT", "NE", "NV", "NM", "ND", "SD", "UT", "WY"]) + p = array(["CA", "OR", "WA"]) + ner = array(["Northeast" for i in ne]) + ser = array(["Southeast" for i in se]) + ncr = array(["North_Central" for i in nc]) + scr = array(["South_Central" for i in sc]) + rr = array(["Rockies" for i in r]) + pr = array(["Pacific" for i in p]) states = concatenate([se, ne, nc, sc, r, p]) region = concatenate([ser, ner, ncr, scr, rr, pr]) - dd = DataFrame({'state_name': states, 'region': region}) - return merge(df, dd, how='left', on='state_name') + dd = DataFrame({"state_name": states, "region": region}) + return merge(df, dd, how="left", on="state_name") -def get_epa_location_df(df, param, site='', city='', region='', epa_region='', state=''): +def get_epa_location_df(df, param, site="", city="", region="", epa_region="", state=""): """Short summary. Parameters @@ -164,58 +166,64 @@ def get_epa_location_df(df, param, site='', city='', region='', epa_region='', s Description of returned object. """ - new = df.groupby('variable').get_group(param) - if site != '': + new = df.groupby("variable").get_group(param) + if site != "": if site in new.siteid.unique(): df2 = new.loc[new.siteid == site] - title = df2.siteid.unique().astype('str')[0].zfill(9) - elif city != '': + title = df2.siteid.unique().astype("str")[0].zfill(9) + elif city != "": names = df.msa_name.dropna().unique() for i in names: if i.upper().find(city.upper()) != -1: name = i print(name) - df2 = new[new['msa_name'] == name].copy().drop_duplicates() + df2 = new[new["msa_name"] == name].copy().drop_duplicates() title = name - elif state != '': - df2 = new[new['state_name'].str.upper() == state.upper()].copy().drop_duplicates() - title = 'STATE: ' + state.upper() - elif region != '': - df2 = new[new['Region'].str.upper() == region.upper()].copy().drop_duplicates() - title = 'REGION: ' + region.upper() - elif epa_region != '': - df2 = new[new['EPA_region'].str.upper() == epa_region.upper()].copy().drop_duplicates() - title = 'EPA_REGION: ' + epa_region.upper() + elif state != "": + df2 = new[new["state_name"].str.upper() == state.upper()].copy().drop_duplicates() + title = "STATE: " + state.upper() + elif region != "": + df2 = new[new["Region"].str.upper() == region.upper()].copy().drop_duplicates() + title = "REGION: " + region.upper() + elif epa_region != "": + df2 = new[new["EPA_region"].str.upper() == epa_region.upper()].copy().drop_duplicates() + title = "EPA_REGION: " + epa_region.upper() else: df2 = new - title = 'Domain' + title = "Domain" return df2, title -def regulatory_resample(df, col='model', pollutant_standard=None): - from pandas import to_timedelta, concat +def regulatory_resample(df, col="model", pollutant_standard=None): + from pandas import concat, to_timedelta - df['time_local'] = df.time + to_timedelta(df.gmt_offset, unit='H') - if df.variable.unique()[0] == 'CO': + df["time_local"] = df.time + to_timedelta(df.gmt_offset, unit="H") + if df.variable.unique()[0] == "CO": df1 = calc_daily_max(df, rolling_frequency=1) - df1['pollutant_standard'] = 'CO 1-hour 1971' + df1["pollutant_standard"] = "CO 1-hour 1971" df2 = calc_daily_max(df, rolling_frequency=8) - df2['pollutant_standard'] = 'CO 8-hour 1971' + df2["pollutant_standard"] = "CO 8-hour 1971" dfreturn = concat([df1, df2], ignore_index=True) - elif df.variable.unique()[0] == 'OZONE': + elif df.variable.unique()[0] == "OZONE": dfreturn = calc_daily_max(df, rolling_frequency=8) - elif df.variable.unique()[0] == 'SO2': + elif df.variable.unique()[0] == "SO2": df1 = calc_daily_max(df, rolling_frequency=1) - df1['pollutant_standard'] = 'SO2 1-hour 1971' + df1["pollutant_standard"] = "SO2 1-hour 1971" df2 = calc_daily_max(df, rolling_frequency=3) - df2['pollutant_standard'] = 'SO2 8-hour 1971' + df2["pollutant_standard"] = "SO2 8-hour 1971" dfreturn = concat([df1, df2], ignore_index=True) - elif df.variable.unique()[0] == 'NO2': + elif df.variable.unique()[0] == "NO2": dfreturn = calc_daily_max(df, rolling_frequency=1) else: # do daily average - dfn = df.drop_duplicates(subset=['siteid']) - df = df.groupby('siteid')[col].resample('D').mean().reset_index().rename(columns={'level_1': 'time_local'}) - dfreturn = df.merge(dfn, how='left', on='siteid') + dfn = df.drop_duplicates(subset=["siteid"]) + df = ( + df.groupby("siteid")[col] + .resample("D") + .mean() + .reset_index() + .rename(columns={"level_1": "time_local"}) + ) + dfreturn = df.merge(dfn, how="left", on="siteid") return dfreturn @@ -225,229 +233,269 @@ def calc_daily_max(df, param=None, rolling_frequency=8): if param is None: temp = df.copy() else: - temp = df.groupby('variable').get_group(param) + temp = df.groupby("variable").get_group(param) temp.index = temp.time_local if rolling_frequency > 1: - g = temp.groupby('siteid')['model', 'gmt_offset'].rolling(rolling_frequency, center=True, win_type='boxcar').mean() + g = ( + temp.groupby("siteid")["model", "gmt_offset"] + .rolling(rolling_frequency, center=True, win_type="boxcar") + .mean() + ) q = g.reset_index(level=0) - k = q.groupby('siteid').resample('D').max().reset_index(level=1).reset_index(drop='siteid').dropna() + k = ( + q.groupby("siteid") + .resample("D") + .max() + .reset_index(level=1) + .reset_index(drop="siteid") + .dropna() + ) else: - k = temp.groupby('siteid')['model', 'gmt_offset'].resample('D').max().reset_index().rename({'level_1': 'time_local'}) - columnstomerge = temp.columns[~temp.columns.isin(k.columns) * (temp.columns != 'time')].append(Index(['siteid'])) + k = ( + temp.groupby("siteid")["model", "gmt_offset"] + .resample("D") + .max() + .reset_index() + .rename({"level_1": "time_local"}) + ) + columnstomerge = temp.columns[~temp.columns.isin(k.columns) * (temp.columns != "time")].append( + Index(["siteid"]) + ) if param is None: - dff = k.merge(df[columnstomerge], on='siteid', how='left').drop_duplicates(subset=['siteid', 'time_local']) - else: - dff = k.merge(df.groupby('variable').get_group(param)[columnstomerge], on='siteid', how='left').drop_duplicates( - subset=['siteid', 'time_local'] + dff = k.merge(df[columnstomerge], on="siteid", how="left").drop_duplicates( + subset=["siteid", "time_local"] ) - dff['time'] = dff.time_local - to_timedelta(dff.gmt_offset, unit='H') + else: + dff = k.merge( + df.groupby("variable").get_group(param)[columnstomerge], on="siteid", how="left" + ).drop_duplicates(subset=["siteid", "time_local"]) + dff["time"] = dff.time_local - to_timedelta(dff.gmt_offset, unit="H") return dff def convert_statenames_to_abv(df): d = { - 'Alabama': 'AL', - 'Alaska': 'AK', - 'Arizona': 'AZ', - 'Arkansas': 'AR', - 'California': 'CA', - 'Colorado': 'CO', - 'Connecticut': 'CT', - 'Delaware': 'DE', - 'Florida': 'FL', - 'Georgia': 'GA', - 'Hawaii': 'HI', - 'Idaho': 'ID', - 'Illinois': 'IL', - 'Indiana': 'IN', - 'Iowa': 'IA', - 'Kansas': 'KS', - 'Kentucky': 'KY', - 'Louisiana': 'LA', - 'Maine': 'ME', - 'Maryland': 'MD', - 'Massachusetts': 'MA', - 'Michigan': 'MI', - 'Minnesota': 'MN', - 'Mississippi': 'MS', - 'Missouri': 'MO', - 'Montana': 'MT', - 'Nebraska': 'NE', - 'Nevada': 'NV', - 'New Hampshire': 'NH', - 'New Jersey': 'NJ', - 'New Mexico': 'NM', - 'New York': 'NY', - 'North Carolina': 'NC', - 'North Dakota': 'ND', - 'Ohio': 'OH', - 'Oklahoma': 'OK', - 'Oregon': 'OR', - 'Pennsylvania': 'PA', - 'Rhode Island': 'RI', - 'South Carolina': 'SC', - 'South Dakota': 'SD', - 'state': 'Postal', - 'Tennessee': 'TN', - 'Texas': 'TX', - 'Utah': 'UT', - 'Vermont': 'VT', - 'Virginia': 'VA', - 'Washington': 'WA', - 'West Virginia': 'WV', - 'Wisconsin': 'WI', - 'Wyoming': 'WY', + "Alabama": "AL", + "Alaska": "AK", + "Arizona": "AZ", + "Arkansas": "AR", + "California": "CA", + "Colorado": "CO", + "Connecticut": "CT", + "Delaware": "DE", + "Florida": "FL", + "Georgia": "GA", + "Hawaii": "HI", + "Idaho": "ID", + "Illinois": "IL", + "Indiana": "IN", + "Iowa": "IA", + "Kansas": "KS", + "Kentucky": "KY", + "Louisiana": "LA", + "Maine": "ME", + "Maryland": "MD", + "Massachusetts": "MA", + "Michigan": "MI", + "Minnesota": "MN", + "Mississippi": "MS", + "Missouri": "MO", + "Montana": "MT", + "Nebraska": "NE", + "Nevada": "NV", + "New Hampshire": "NH", + "New Jersey": "NJ", + "New Mexico": "NM", + "New York": "NY", + "North Carolina": "NC", + "North Dakota": "ND", + "Ohio": "OH", + "Oklahoma": "OK", + "Oregon": "OR", + "Pennsylvania": "PA", + "Rhode Island": "RI", + "South Carolina": "SC", + "South Dakota": "SD", + "state": "Postal", + "Tennessee": "TN", + "Texas": "TX", + "Utah": "UT", + "Vermont": "VT", + "Virginia": "VA", + "Washington": "WA", + "West Virginia": "WV", + "Wisconsin": "WI", + "Wyoming": "WY", } for i in d: - df['state_name'].loc[df.state_name.isin([i])] = d[i] - df['state_name'].loc[df.state_name.isin(['Canada'])] = 'CC' - df['state_name'].loc[df.state_name.isin(['Mexico'])] = 'MM' + df["state_name"].loc[df.state_name.isin([i])] = d[i] + df["state_name"].loc[df.state_name.isin(["Canada"])] = "CC" + df["state_name"].loc[df.state_name.isin(["Mexico"])] = "MM" return df def read_monitor_file(network=None, airnow=False, drop_latlon=True): - import pandas as pd import os + import pandas as pd + if airnow: - monitor_airnow_url = 'https://s3-us-west-1.amazonaws.com//files.airnowtech.org/airnow/today/monitoring_site_locations.dat' + monitor_airnow_url = "https://s3-us-west-1.amazonaws.com//files.airnowtech.org/airnow/today/monitoring_site_locations.dat" colsinuse = [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - airnow = pd.read_csv(monitor_airnow_url, delimiter='|', header=None, usecols=colsinuse, dtype={0: str}, encoding="ISO-8859-1") + airnow = pd.read_csv( + monitor_airnow_url, + delimiter="|", + header=None, + usecols=colsinuse, + dtype={0: str}, + encoding="ISO-8859-1", + ) airnow.columns = [ - 'siteid', - 'Site_Code', - 'Site_Name', - 'Status', - 'Agency', - 'Agency_Name', - 'EPA_region', - 'latitude', - 'longitude', - 'Elevation', - 'GMT_Offset', - 'Country_Code', - 'CMSA_Code', - 'CMSA_Name', - 'MSA_Code', - 'MSA_Name', - 'state_Code', - 'state_Name', - 'County_Code', - 'County_Name', - 'City_Code', + "siteid", + "Site_Code", + "Site_Name", + "Status", + "Agency", + "Agency_Name", + "EPA_region", + "latitude", + "longitude", + "Elevation", + "GMT_Offset", + "Country_Code", + "CMSA_Code", + "CMSA_Name", + "MSA_Code", + "MSA_Name", + "state_Code", + "state_Name", + "County_Code", + "County_Name", + "City_Code", ] - airnow['airnow_flag'] = 'AIRNOW' + airnow["airnow_flag"] = "AIRNOW" airnow.columns = [i.lower() for i in airnow.columns] return airnow else: try: basedir = os.path.abspath(os.path.dirname(__file__))[:-3] - fname = os.path.join(basedir, 'data', 'monitoring_site_locations.hdf') + fname = os.path.join(basedir, "data", "monitoring_site_locations.hdf") if os.path.isfile(fname): - print('Monitor File Path: ' + fname) + print("Monitor File Path: " + fname) sss = pd.read_hdf(fname) # monitor_drop = ['state_code', u'county_code'] # s.drop(monitor_drop, axis=1, inplace=True) except Exception: - print('Monitor File Not Found... Reprocessing') - baseurl = 'https://aqs.epa.gov/aqsweb/airdata/' - site_url = baseurl + 'aqs_sites.zip' + print("Monitor File Not Found... Reprocessing") + baseurl = "https://aqs.epa.gov/aqsweb/airdata/" + site_url = baseurl + "aqs_sites.zip" # has network info (CSN IMPROVE etc....) - monitor_url = baseurl + 'aqs_monitors.zip' + monitor_url = baseurl + "aqs_monitors.zip" # Airnow monitor file - monitor_airnow_url = 'https://s3-us-west-1.amazonaws.com//files.airnowtech.org/airnow/today/monitoring_site_locations.dat' + monitor_airnow_url = "https://s3-us-west-1.amazonaws.com//files.airnowtech.org/airnow/today/monitoring_site_locations.dat" colsinuse = [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - airnow = pd.read_csv(monitor_airnow_url, delimiter='|', header=None, usecols=colsinuse, dtype={0: str}, encoding="ISO-8859-1") + airnow = pd.read_csv( + monitor_airnow_url, + delimiter="|", + header=None, + usecols=colsinuse, + dtype={0: str}, + encoding="ISO-8859-1", + ) airnow.columns = [ - 'siteid', - 'Site_Code', - 'Site_Name', - 'Status', - 'Agency', - 'Agency_Name', - 'EPA_region', - 'latitude', - 'longitude', - 'Elevation', - 'GMT_Offset', - 'Country_Code', - 'CMSA_Code', - 'CMSA_Name', - 'MSA_Code', - 'MSA_Name', - 'state_Code', - 'state_Name', - 'County_Code', - 'County_Name', - 'City_Code', + "siteid", + "Site_Code", + "Site_Name", + "Status", + "Agency", + "Agency_Name", + "EPA_region", + "latitude", + "longitude", + "Elevation", + "GMT_Offset", + "Country_Code", + "CMSA_Code", + "CMSA_Name", + "MSA_Code", + "MSA_Name", + "state_Code", + "state_Name", + "County_Code", + "County_Name", + "City_Code", ] - airnow['airnow_flag'] = 'AIRNOW' + airnow["airnow_flag"] = "AIRNOW" airnow.columns = [i.lower() for i in airnow.columns] # Read EPA Site file - site = pd.read_csv(site_url, encoding='ISO-8859-1') + site = pd.read_csv(site_url, encoding="ISO-8859-1") # read epa monitor file - monitor = pd.read_csv(monitor_url, encoding='ISO-8859-1') + monitor = pd.read_csv(monitor_url, encoding="ISO-8859-1") # make siteid column - site['siteid'] = ( - site['State Code'].astype(str).str.zfill(2) - + site['County Code'].astype(str).str.zfill(3) - + site['Site Number'].astype(str).str.zfill(4) + site["siteid"] = ( + site["State Code"].astype(str).str.zfill(2) + + site["County Code"].astype(str).str.zfill(3) + + site["Site Number"].astype(str).str.zfill(4) + ) + monitor["siteid"] = ( + monitor["State Code"].astype(str).str.zfill(2) + + monitor["County Code"].astype(str).str.zfill(3) + + monitor["Site Number"].astype(str).str.zfill(4) ) - monitor['siteid'] = ( - monitor['State Code'].astype(str).str.zfill(2) - + monitor['County Code'].astype(str).str.zfill(3) - + monitor['Site Number'].astype(str).str.zfill(4) + site.columns = [i.replace(" ", "_") for i in site.columns] + s = monitor.merge( + site[["siteid", "Land_Use", "Location_Setting", "GMT_Offset"]], + on=["siteid"], + how="left", ) - site.columns = [i.replace(' ', '_') for i in site.columns] - s = monitor.merge(site[['siteid', 'Land_Use', 'Location_Setting', 'GMT_Offset']], on=['siteid'], how='left') - s.columns = [i.replace(' ', '_').lower() for i in s.columns] + s.columns = [i.replace(" ", "_").lower() for i in s.columns] monitor_drop = [ - 'state_code', - u'county_code', - u'site_number', - 'extraction_date', - 'parameter_code', - 'parameter_name', - 'poc', - 'last_sample_date', - 'pqao', - 'reporting_agency', - 'exclusions', - u'monitoring_objective', - 'last_method_code', - 'last_method', - u'naaqs_primary_monitor', - u'qa_primary_monitor', + "state_code", + "county_code", + "site_number", + "extraction_date", + "parameter_code", + "parameter_name", + "poc", + "last_sample_date", + "pqao", + "reporting_agency", + "exclusions", + "monitoring_objective", + "last_method_code", + "last_method", + "naaqs_primary_monitor", + "qa_primary_monitor", ] s.drop(monitor_drop, axis=1, inplace=True) # drop airnow keys for merge airnow_drop = [ - u'site_Code', - u'site_Name', - u'status', - u'agency', - 'agency_name', - 'country_code', - u'cmsa_code', - 'state_code', - u'county_code', - u'city_code', - u'latitude', - u'longitude', - 'gmt_offset', - 'state_name', - 'county_name', + "site_Code", + "site_Name", + "status", + "agency", + "agency_name", + "country_code", + "cmsa_code", + "state_code", + "county_code", + "city_code", + "latitude", + "longitude", + "gmt_offset", + "state_name", + "county_name", ] airnow_drop = [i.lower() for i in airnow_drop] airnow.drop(airnow_drop, axis=1, inplace=True) ss = pd.concat([s, airnow], ignore_index=True, sort=True) - sss = convert_statenames_to_abv(ss).dropna(subset=['latitude', 'longitude']) + sss = convert_statenames_to_abv(ss).dropna(subset=["latitude", "longitude"]) if network is not None: - sss = sss.loc[sss.networks.isin([network])].drop_duplicates(subset=['siteid']) + sss = sss.loc[sss.networks.isin([network])].drop_duplicates(subset=["siteid"]) # Getting error that 'latitude' 'longitude' not contained in axis drop_latlon = False if drop_latlon: - if pd.Series(sss.keys()).isin(['latitude', 'longitude']): - return sss.drop(['latitude', 'longitude'], axis=1).drop_duplicates() + if pd.Series(sss.keys()).isin(["latitude", "longitude"]): + return sss.drop(["latitude", "longitude"], axis=1).drop_duplicates() else: return sss.drop_duplicates() diff --git a/monetio/obs/improve_mod.py b/monetio/obs/improve_mod.py index b3801e47..de9c2e04 100644 --- a/monetio/obs/improve_mod.py +++ b/monetio/obs/improve_mod.py @@ -1,10 +1,8 @@ -from builtins import object - import pandas as pd from numpy import NaN -class IMPROVE(object): +class IMPROVE: """Short summary. Attributes @@ -35,7 +33,7 @@ def __init__(self): self.df = None self.daily = True - def add_data(self, fname, add_meta=False, delimiter='\t'): + def add_data(self, fname, add_meta=False, delimiter="\t"): """This assumes that you have downloaded the data from http://views.cira.colostate.edu/fed/DataWizard/Default.aspx The data is the IMPROVE Aerosol dataset @@ -60,38 +58,51 @@ def add_data(self, fname, add_meta=False, delimiter='\t'): """ from .epa_util import read_monitor_file - f = open(fname, 'r') + f = open(fname) lines = f.readlines() skiprows = 0 skip = False for i, line in enumerate(lines): - if line == 'Data\n': + if line == "Data\n": skip = True skiprows = i + 1 break # if meta data is inlcuded if skip: - df = pd.read_csv(fname, delimiter=delimiter, parse_dates=[2], infer_datetime_format=True, dtype={'EPACode': str}, skiprows=skiprows) + df = pd.read_csv( + fname, + delimiter=delimiter, + parse_dates=[2], + infer_datetime_format=True, + dtype={"EPACode": str}, + skiprows=skiprows, + ) else: - df = pd.read_csv(fname, delimiter=delimiter, parse_dates=[2], infer_datetime_format=True, dtype={'EPACode': str}) - df.rename(columns={'EPACode': 'epaid'}, inplace=True) - df.rename(columns={'Val': 'Obs'}, inplace=True) - df.rename(columns={'State': 'state_name'}, inplace=True) - df.rename(columns={'ParamCode': 'variable'}, inplace=True) - df.rename(columns={'SiteCode': 'siteid'}, inplace=True) - df.rename(columns={'Unit': 'Units'}, inplace=True) - df.rename(columns={'Date': 'time'}, inplace=True) - df.drop('Dataset', axis=1, inplace=True) - df['time'] = pd.to_datetime(df.time, format='%Y%m%d') + df = pd.read_csv( + fname, + delimiter=delimiter, + parse_dates=[2], + infer_datetime_format=True, + dtype={"EPACode": str}, + ) + df.rename(columns={"EPACode": "epaid"}, inplace=True) + df.rename(columns={"Val": "Obs"}, inplace=True) + df.rename(columns={"State": "state_name"}, inplace=True) + df.rename(columns={"ParamCode": "variable"}, inplace=True) + df.rename(columns={"SiteCode": "siteid"}, inplace=True) + df.rename(columns={"Unit": "Units"}, inplace=True) + df.rename(columns={"Date": "time"}, inplace=True) + df.drop("Dataset", axis=1, inplace=True) + df["time"] = pd.to_datetime(df.time, format="%Y%m%d") df.columns = [i.lower() for i in df.columns] - if pd.Series(df.keys()).isin(['epaid']).max(): - df['epaid'] = df.epaid.astype(str).str.zfill(9) + if pd.Series(df.keys()).isin(["epaid"]).max(): + df["epaid"] = df.epaid.astype(str).str.zfill(9) if add_meta: - monitor_df = read_monitor_file(network='IMPROVE') # .drop( + monitor_df = read_monitor_file(network="IMPROVE") # .drop( # dropkeys, axis=1) - df = df.merge(monitor_df, how='left', left_on='epaid', right_on='siteid') - df.drop(['siteid_y', 'state_name_y'], inplace=True, axis=1) - df.rename(columns={'siteid_x': 'siteid', 'state_name_x': 'state_name'}, inplace=True) + df = df.merge(monitor_df, how="left", left_on="epaid", right_on="siteid") + df.drop(["siteid_y", "state_name_y"], inplace=True, axis=1) + df.rename(columns={"siteid_x": "siteid", "state_name_x": "state_name"}, inplace=True) try: df.obs.loc[df.obs < df.mdl] = NaN @@ -137,7 +148,7 @@ def get_date_range(self, dates): con = (self.df.time >= dates[0]) & (self.df.time <= dates[-1]) self.df = self.df.loc[con] - def set_daterange(self, begin='', end=''): + def set_daterange(self, begin="", end=""): """Short summary. Parameters @@ -153,5 +164,5 @@ def set_daterange(self, begin='', end=''): Description of returned object. """ - dates = pd.date_range(start=begin, end=end, freq='H').values.astype('M8[s]').astype('O') + dates = pd.date_range(start=begin, end=end, freq="H").values.astype("M8[s]").astype("O") self.dates = dates diff --git a/monetio/obs/ish.py b/monetio/obs/ish.py index a2af1a11..5a200b9e 100644 --- a/monetio/obs/ish.py +++ b/monetio/obs/ish.py @@ -1,6 +1,5 @@ """Python module for reading NOAA ISH files""" -from builtins import object, zip import dask import dask.dataframe as dd @@ -11,7 +10,7 @@ ProgressBar().register() -def add_data(self, dates, box=None, country=None, state=None, site=None, resample=True, window='H'): +def add_data(self, dates, box=None, country=None, state=None, site=None, resample=True, window="H"): """Add data from integrated surface database. Parameters @@ -38,11 +37,13 @@ def add_data(self, dates, box=None, country=None, state=None, site=None, resampl """ ish = ISH() - df = ish.add_data(dates, box=None, country=None, state=None, site=None, resample=True, window='H') + df = ish.add_data( + dates, box=None, country=None, state=None, site=None, resample=True, window="H" + ) return df -class ISH(object): +class ISH: """Integrated Surface Hourly (also known as ISD, Integrated Surface Data) Attributes @@ -63,45 +64,76 @@ class ISH(object): """ def __init__(self): - self.WIDTHS = [4, 11, 8, 4, 1, 6, 7, 5, 5, 5, 4, 3, 1, 1, 4, 1, 5, 1, 1, 1, 6, 1, 1, 1, 5, 1, 5, 1, 5, 1] + self.WIDTHS = [ + 4, + 11, + 8, + 4, + 1, + 6, + 7, + 5, + 5, + 5, + 4, + 3, + 1, + 1, + 4, + 1, + 5, + 1, + 1, + 1, + 6, + 1, + 1, + 1, + 5, + 1, + 5, + 1, + 5, + 1, + ] self.DTYPES = [ - ('varlength', 'i2'), - ('station_id', 'S11'), - ('date', 'i4'), - ('htime', 'i2'), - ('source_flag', 'S1'), - ('latitude', 'float'), - ('longitude', 'float'), - ('code', 'S5'), - ('elev', 'i2'), - ('call_letters', 'S5'), - ('qc_process', 'S4'), - ('wdir', 'i2'), - ('wdir_quality', 'S1'), - ('wdir_type', 'S1'), - ('ws', 'i2'), - ('ws_quality', 'S1'), - ('ceiling', 'i4'), - ('ceiling_quality', 'S1'), - ('ceiling_code', 'S1'), - ('ceiling_cavok', 'S1'), - ('vsb', 'i4'), - ('vsb_quality', 'S1'), - ('vsb_variability', 'S1'), - ('vsb_variability_quality', 'S1'), - ('t', 'i2'), - ('t_quality', 'S1'), - ('dpt', 'i2'), - ('dpt_quality', 'S1'), - ('p', 'i4'), - ('p_quality', 'S1'), + ("varlength", "i2"), + ("station_id", "S11"), + ("date", "i4"), + ("htime", "i2"), + ("source_flag", "S1"), + ("latitude", "float"), + ("longitude", "float"), + ("code", "S5"), + ("elev", "i2"), + ("call_letters", "S5"), + ("qc_process", "S4"), + ("wdir", "i2"), + ("wdir_quality", "S1"), + ("wdir_type", "S1"), + ("ws", "i2"), + ("ws_quality", "S1"), + ("ceiling", "i4"), + ("ceiling_quality", "S1"), + ("ceiling_code", "S1"), + ("ceiling_cavok", "S1"), + ("vsb", "i4"), + ("vsb_quality", "S1"), + ("vsb_variability", "S1"), + ("vsb_variability_quality", "S1"), + ("t", "i2"), + ("t_quality", "S1"), + ("dpt", "i2"), + ("dpt_quality", "S1"), + ("p", "i4"), + ("p_quality", "S1"), ] self.NAMES, _ = list(zip(*self.DTYPES)) - self.history_file = 'https://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.csv' + self.history_file = "https://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.csv" self.history = None self.daily = False - def delimit(self, file_object, delimiter=','): + def delimit(self, file_object, delimiter=","): """Iterate over the lines in a file yielding comma delimited versions. Parameters @@ -131,7 +163,7 @@ def delimit(self, file_object, delimiter=','): for w in self.WIDTHS: items.append(line[index : index + w]) index = index + w - yield ','.join(items) + yield ",".join(items) def _clean_column(self, series, missing=9999, multiplier=1): series = series.apply(float) @@ -146,17 +178,20 @@ def _clean(self, frame): """Clean up the data frame""" # index by time - frame['time'] = [pd.Timestamp('{:08}{:04}'.format(date, htime)) for date, htime in zip(frame['date'], frame['htime'])] + frame["time"] = [ + pd.Timestamp(f"{date:08}{htime:04}") + for date, htime in zip(frame["date"], frame["htime"]) + ] # these fields were combined into 'time' - frame.drop(['date', 'htime'], axis=1, inplace=True) - frame.set_index('time', drop=True, inplace=True) - frame = self._clean_column_by_name(frame, 'wdir', missing=999) - frame = self._clean_column_by_name(frame, 'ws', multiplier=10) - frame = self._clean_column_by_name(frame, 'ceiling', missing=99999) - frame = self._clean_column_by_name(frame, 'vsb', missing=999999) - frame = self._clean_column_by_name(frame, 't', multiplier=10) - frame = self._clean_column_by_name(frame, 'dpt', multiplier=10) - frame = self._clean_column_by_name(frame, 'p', multiplier=10, missing=99999) + frame.drop(["date", "htime"], axis=1, inplace=True) + frame.set_index("time", drop=True, inplace=True) + frame = self._clean_column_by_name(frame, "wdir", missing=999) + frame = self._clean_column_by_name(frame, "ws", multiplier=10) + frame = self._clean_column_by_name(frame, "ceiling", missing=99999) + frame = self._clean_column_by_name(frame, "vsb", missing=999999) + frame = self._clean_column_by_name(frame, "t", multiplier=10) + frame = self._clean_column_by_name(frame, "dpt", multiplier=10) + frame = self._clean_column_by_name(frame, "p", multiplier=10, missing=99999) return frame def read_data_frame(self, file_object): @@ -176,7 +211,7 @@ def read_data_frame(self, file_object): frame_as_array = np.genfromtxt(file_object, delimiter=self.WIDTHS, dtype=self.DTYPES) frame = pd.DataFrame.from_records(frame_as_array) df = self._clean(frame) - df.drop(['latitude', 'longitude'], axis=1, inplace=True) + df.drop(["latitude", "longitude"], axis=1, inplace=True) # df.latitude = self.history.groupby('station_id').get_group( # df.station_id[0]).LAT.values[0] # df.longitude = self.history.groupby('station_id').get_group( @@ -197,28 +232,30 @@ def read_ish_history(self): """ fname = self.history_file - self.history = pd.read_csv(fname, parse_dates=['BEGIN', 'END'], infer_datetime_format=True) + self.history = pd.read_csv(fname, parse_dates=["BEGIN", "END"], infer_datetime_format=True) self.history.columns = [i.lower() for i in self.history.columns] index1 = (self.history.end >= self.dates.min()) & (self.history.begin <= self.dates.max()) - self.history = self.history.loc[index1, :].dropna(subset=['lat', 'lon']) + self.history = self.history.loc[index1, :].dropna(subset=["lat", "lon"]) - self.history.loc[:, 'usaf'] = self.history.usaf.astype('str').str.zfill(6) - self.history.loc[:, 'wban'] = self.history.wban.astype('str').str.zfill(5) - self.history['station_id'] = self.history.usaf + self.history.wban - self.history.rename(columns={'lat': 'latitude', 'lon': 'longitude'}, inplace=True) + self.history.loc[:, "usaf"] = self.history.usaf.astype("str").str.zfill(6) + self.history.loc[:, "wban"] = self.history.wban.astype("str").str.zfill(5) + self.history["station_id"] = self.history.usaf + self.history.wban + self.history.rename(columns={"lat": "latitude", "lon": "longitude"}, inplace=True) def subset_sites(self, latmin=32.65, lonmin=-113.3, latmax=34.5, lonmax=-110.4): - """ find sites within designated region""" + """find sites within designated region""" latindex = (self.history.latitude >= latmin) & (self.history.latitude <= latmax) lonindex = (self.history.longitude >= lonmin) & (self.history.longitude <= lonmax) dfloc = self.history.loc[latindex & lonindex, :] - print('SUBSET') + print("SUBSET") print(dfloc.latitude.unique()) print(dfloc.longitude.unique()) return dfloc - def add_data(self, dates, box=None, country=None, state=None, site=None, resample=True, window='H'): + def add_data( + self, dates, box=None, country=None, state=None, site=None, resample=True, window="H" + ): """Short summary. Parameters @@ -247,24 +284,26 @@ def add_data(self, dates, box=None, country=None, state=None, site=None, resampl self.dates = pd.to_datetime(dates) idate = dates[0] - year = idate.strftime('%Y') - url = 'https://www1.ncdc.noaa.gov/pub/data/noaa/' + year + '/' + year = idate.strftime("%Y") + url = "https://www1.ncdc.noaa.gov/pub/data/noaa/" + year + "/" if self.history is None: self.read_ish_history() - self.history['fname'] = url + self.history.usaf + '-' + self.history.wban + '-' + year + '.gz' + self.history["fname"] = ( + url + self.history.usaf + "-" + self.history.wban + "-" + year + ".gz" + ) dfloc = self.history.copy() # if isinstance(box, None): # type(box) is not type(None): if box is not None: # type(box) is not type(None): - print('Retrieving Sites in: ' + ' '.join(map(str, box))) + print("Retrieving Sites in: " + " ".join(map(str, box))) dfloc = self.subset_sites(latmin=box[0], lonmin=box[1], latmax=box[2], lonmax=box[3]) elif country is not None: - print('Retrieving Country: ' + country) + print("Retrieving Country: " + country) dfloc = self.history.loc[self.history.ctry == country, :] elif state is not None: - print('Retrieving State: ' + state) + print("Retrieving State: " + state) dfloc = self.history.loc[self.history.STATE == state, :] elif site is not None: - print('Retrieving Site: ' + site) + print("Retrieving Site: " + site) dfloc = self.history.loc[self.history.station_id == site, :] print(dfloc.fname.unique()) objs = self.get_url_file_objs(dfloc.fname.unique()) @@ -276,22 +315,26 @@ def add_data(self, dates, box=None, country=None, state=None, site=None, resampl # except: # pass - print(' Reading ISH into pandas DataFrame...') + print(" Reading ISH into pandas DataFrame...") dfs = [dask.delayed(self.read_data_frame)(f) for f in objs] dff = dd.from_delayed(dfs) self.df = dff.compute() - self.df.loc[self.df.vsb == 99999, 'vsb'] = NaN + self.df.loc[self.df.vsb == 99999, "vsb"] = NaN if resample: - print(' Resampling to every ' + window) + print(" Resampling to every " + window) self.df.index = self.df.time - self.df = self.df.groupby('station_id').resample('H').mean().reset_index() + self.df = self.df.groupby("station_id").resample("H").mean().reset_index() # this was encoded as byte literal but in dfloc it is a string so could # not merge on station_id correctly. try: - self.df['station_id'] = self.df['station_id'].str.decode("utf-8") + self.df["station_id"] = self.df["station_id"].str.decode("utf-8") except RuntimeError: pass - self.df = self.df.merge(dfloc[['station_id', 'latitude', 'longitude', 'station name']], on=['station_id'], how='left') + self.df = self.df.merge( + dfloc[["station_id", "latitude", "longitude", "station name"]], + on=["station_id"], + how="left", + ) return self.df.copy() @@ -311,36 +354,37 @@ def get_url_file_objs(self, fname): """ import gzip import shutil + import requests objs = [] - print(' Constructing ISH file objects from urls...') + print(" Constructing ISH file objects from urls...") mmm = 0 jjj = 0 for iii in fname: # print i try: r2 = requests.get(iii, stream=True) - temp = iii.split('/') + temp = iii.split("/") temp = temp[-1] - fname = 'isd.' + temp.replace('.gz', '') + fname = "isd." + temp.replace(".gz", "") if r2.status_code != 404: objs.append(fname) - with open(fname, 'wb') as fid: + with open(fname, "wb") as fid: # TODO. currently shutil writes the file to the hard # drive. try to find way around this step, so file does # not need to be written and then read. gzip_file = gzip.GzipFile(fileobj=r2.raw) shutil.copyfileobj(gzip_file, fid) - print('SUCCEEDED REQUEST for ' + iii) + print("SUCCEEDED REQUEST for " + iii) else: - print('404 message ' + iii) + print("404 message " + iii) mmm += 1 except RuntimeError: jjj += 1 - print('REQUEST FAILED ' + iii) + print("REQUEST FAILED " + iii) pass if jjj > 100: - print('Over ' + str(jjj) + ' failed. break loop') + print("Over " + str(jjj) + " failed. break loop") break return objs diff --git a/monetio/obs/ish_lite.py b/monetio/obs/ish_lite.py index e5409f54..77d542c3 100644 --- a/monetio/obs/ish_lite.py +++ b/monetio/obs/ish_lite.py @@ -1,9 +1,6 @@ """Python module for reading NOAA ISH files""" -from builtins import object, zip -import dask -import dask.dataframe as dd import numpy as np import pandas as pd from dask.diagnostics import ProgressBar @@ -11,12 +8,23 @@ ProgressBar().register() -def add_data(dates, box=None, country=None, state=None, site=None, resample=True, window='H', n_procs=1): +def add_data( + dates, box=None, country=None, state=None, site=None, resample=True, window="H", n_procs=1 +): ish = ISH() - return ish.add_data(dates, box=box, country=country, state=state, site=site, resample=resample, window=window, n_procs=n_procs) - - -class ISH(object): + return ish.add_data( + dates, + box=box, + country=country, + state=state, + site=site, + resample=resample, + window=window, + n_procs=n_procs, + ) + + +class ISH: """Integrated Surface Hourly (also known as ISD, Integrated Surface Data) Attributes @@ -37,41 +45,72 @@ class ISH(object): """ def __init__(self): - self.WIDTHS = [4, 2, 8, 4, 1, 6, 7, 5, 5, 5, 4, 3, 1, 1, 4, 1, 5, 1, 1, 1, 6, 1, 1, 1, 5, 1, 5, 1, 5, 1] + self.WIDTHS = [ + 4, + 2, + 8, + 4, + 1, + 6, + 7, + 5, + 5, + 5, + 4, + 3, + 1, + 1, + 4, + 1, + 5, + 1, + 1, + 1, + 6, + 1, + 1, + 1, + 5, + 1, + 5, + 1, + 5, + 1, + ] self.DTYPES = [ - ('varlength', 'i2'), - ('station_id', 'S11'), - ('date', 'i4'), - ('htime', 'i2'), - ('source_flag', 'S1'), - ('latitude', 'float'), - ('longitude', 'float'), - ('code', 'S5'), - ('elev', 'i2'), - ('call_letters', 'S5'), - ('qc_process', 'S4'), - ('wdir', 'i2'), - ('wdir_quality', 'S1'), - ('wdir_type', 'S1'), - ('ws', 'i2'), - ('ws_quality', 'S1'), - ('ceiling', 'i4'), - ('ceiling_quality', 'S1'), - ('ceiling_code', 'S1'), - ('ceiling_cavok', 'S1'), - ('vsb', 'i4'), - ('vsb_quality', 'S1'), - ('vsb_variability', 'S1'), - ('vsb_variability_quality', 'S1'), - ('t', 'i2'), - ('t_quality', 'S1'), - ('dpt', 'i2'), - ('dpt_quality', 'S1'), - ('p', 'i4'), - ('p_quality', 'S1'), + ("varlength", "i2"), + ("station_id", "S11"), + ("date", "i4"), + ("htime", "i2"), + ("source_flag", "S1"), + ("latitude", "float"), + ("longitude", "float"), + ("code", "S5"), + ("elev", "i2"), + ("call_letters", "S5"), + ("qc_process", "S4"), + ("wdir", "i2"), + ("wdir_quality", "S1"), + ("wdir_type", "S1"), + ("ws", "i2"), + ("ws_quality", "S1"), + ("ceiling", "i4"), + ("ceiling_quality", "S1"), + ("ceiling_code", "S1"), + ("ceiling_cavok", "S1"), + ("vsb", "i4"), + ("vsb_quality", "S1"), + ("vsb_variability", "S1"), + ("vsb_variability_quality", "S1"), + ("t", "i2"), + ("t_quality", "S1"), + ("dpt", "i2"), + ("dpt_quality", "S1"), + ("p", "i4"), + ("p_quality", "S1"), ] self.NAMES, _ = list(zip(*self.DTYPES)) - self.history_file = 'https://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.csv' + self.history_file = "https://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.csv" self.history = None self.daily = False @@ -92,7 +131,7 @@ def read_data_frame(self, file_object): frame_as_array = np.genfromtxt(file_object, delimiter=self.WIDTHS, dtype=self.DTYPES) frame = pd.DataFrame.from_records(frame_as_array) df = self._clean(frame) - df.drop(['latitude', 'longitude'], axis=1, inplace=True) + df.drop(["latitude", "longitude"], axis=1, inplace=True) # df.latitude = self.history.groupby('station_id').get_group( # df.station_id[0]).LAT.values[0] # df.longitude = self.history.groupby('station_id').get_group( @@ -113,23 +152,23 @@ def read_ish_history(self): """ fname = self.history_file - self.history = pd.read_csv(fname, parse_dates=['BEGIN', 'END'], infer_datetime_format=True) + self.history = pd.read_csv(fname, parse_dates=["BEGIN", "END"], infer_datetime_format=True) self.history.columns = [i.lower() for i in self.history.columns] index1 = (self.history.end >= self.dates.min()) & (self.history.begin <= self.dates.max()) - self.history = self.history.loc[index1, :].dropna(subset=['lat', 'lon']) + self.history = self.history.loc[index1, :].dropna(subset=["lat", "lon"]) - self.history.loc[:, 'usaf'] = self.history.usaf.astype('str').str.zfill(6) - self.history.loc[:, 'wban'] = self.history.wban.astype('str').str.zfill(5) - self.history['station_id'] = self.history.usaf + self.history.wban - self.history.rename(columns={'lat': 'latitude', 'lon': 'longitude'}, inplace=True) + self.history.loc[:, "usaf"] = self.history.usaf.astype("str").str.zfill(6) + self.history.loc[:, "wban"] = self.history.wban.astype("str").str.zfill(5) + self.history["station_id"] = self.history.usaf + self.history.wban + self.history.rename(columns={"lat": "latitude", "lon": "longitude"}, inplace=True) def subset_sites(self, latmin=32.65, lonmin=-113.3, latmax=34.5, lonmax=-110.4): - """ find sites within designated region""" + """find sites within designated region""" latindex = (self.history.latitude >= latmin) & (self.history.latitude <= latmax) lonindex = (self.history.longitude >= lonmin) & (self.history.longitude <= lonmax) dfloc = self.history.loc[latindex & lonindex, :] - print('SUBSET') + print("SUBSET") print(dfloc.latitude.unique()) print(dfloc.longitude.unique()) return dfloc @@ -144,14 +183,16 @@ def build_urls(self, dates, dfloc): """ furls = [] - fnames = [] - print('Building AIRNOW URLs...') - url = 'https://www1.ncdc.noaa.gov/pub/data/noaa/isd-lite' - dfloc['fname'] = dfloc.usaf.astype(str) + "-" + dfloc.wban.astype(str) + "-" + # fnames = [] + print("Building AIRNOW URLs...") + url = "https://www1.ncdc.noaa.gov/pub/data/noaa/isd-lite" + dfloc["fname"] = dfloc.usaf.astype(str) + "-" + dfloc.wban.astype(str) + "-" for date in self.dates.unique().astype(str): - dfloc['fname'] = dfloc.usaf.astype(str) + "-" + dfloc.wban.astype(str) + "-" + date + ".gz" + dfloc["fname"] = ( + dfloc.usaf.astype(str) + "-" + dfloc.wban.astype(str) + "-" + date + ".gz" + ) for fname in dfloc.fname.values: - furls.append("{}/{}/{}".format(url, date, fname)) + furls.append(f"{url}/{date}/{fname}") # f = url + i.strftime('%Y/%Y%m%d/HourlyData_%Y%m%d%H.dat') # fname = i.strftime('HourlyData_%Y%m%d%H.dat') # furls.append(f) @@ -165,14 +206,34 @@ def build_urls(self, dates, dfloc): def read_csv(self, fname): from numpy import NaN - columns = ['year', 'month', 'day', 'hour', 'temp', 'dew_pt_temp', 'press', 'wdir', 'ws', 'sky_condition', 'precip_1hr', 'precip_6hr'] - df = pd.read_csv(fname, delim_whitespace=True, header=None, names=columns, parse_dates={'time': [0, 1, 2, 3]}, infer_datetime_format=True) - df['temp'] /= 10.0 - df['dew_pt_temp'] /= 10.0 - df['press'] /= 10.0 - df['ws'] /= 10.0 - df['precip_1hr'] /= 10.0 - df['precip_6hr'] /= 10.0 + columns = [ + "year", + "month", + "day", + "hour", + "temp", + "dew_pt_temp", + "press", + "wdir", + "ws", + "sky_condition", + "precip_1hr", + "precip_6hr", + ] + df = pd.read_csv( + fname, + delim_whitespace=True, + header=None, + names=columns, + parse_dates={"time": [0, 1, 2, 3]}, + infer_datetime_format=True, + ) + df["temp"] /= 10.0 + df["dew_pt_temp"] /= 10.0 + df["press"] /= 10.0 + df["ws"] /= 10.0 + df["precip_1hr"] /= 10.0 + df["precip_6hr"] /= 10.0 df = df.replace(-9999, NaN) return df @@ -180,12 +241,22 @@ def aggregrate_files(self, urls, n_procs=1): import dask import dask.dataframe as dd - dfs = [dask.delayed(read_csv)(f) for f in urls] + dfs = [dask.delayed(self.read_csv)(f) for f in urls] dff = dd.from_delayed(dfs) df = dff.compute(num_workers=n_procs) return df - def add_data(self, dates, box=None, country=None, state=None, site=None, resample=True, window='H', n_procs=1): + def add_data( + self, + dates, + box=None, + country=None, + state=None, + site=None, + resample=True, + window="H", + n_procs=1, + ): """Short summary. Parameters @@ -214,16 +285,16 @@ def add_data(self, dates, box=None, country=None, state=None, site=None, resampl self.read_ish_history() dfloc = self.history.copy() if box is not None: # type(box) is not type(None): - print('Retrieving Sites in: ' + ' '.join(map(str, box))) + print("Retrieving Sites in: " + " ".join(map(str, box))) dfloc = self.subset_sites(latmin=box[0], lonmin=box[1], latmax=box[2], lonmax=box[3]) elif country is not None: - print('Retrieving Country: ' + country) + print("Retrieving Country: " + country) dfloc = self.history.loc[self.history.ctry == country, :] elif state is not None: - print('Retrieving State: ' + state) + print("Retrieving State: " + state) dfloc = self.history.loc[self.history.STATE == state, :] elif site is not None: - print('Retrieving Site: ' + site) + print("Retrieving Site: " + site) dfloc = self.history.loc[self.history.station_id == site, :] urls = self.build_urls(dates, dfloc) return self.aggregrate_files(urls, n_procs=n_procs) @@ -244,36 +315,37 @@ def get_url_file_objs(self, fname): """ import gzip import shutil + import requests objs = [] - print(' Constructing ISH file objects from urls...') + print(" Constructing ISH file objects from urls...") mmm = 0 jjj = 0 for iii in fname: # print i try: r2 = requests.get(iii, stream=True) - temp = iii.split('/') + temp = iii.split("/") temp = temp[-1] - fname = 'isd.' + temp.replace('.gz', '') + fname = "isd." + temp.replace(".gz", "") if r2.status_code != 404: objs.append(fname) - with open(fname, 'wb') as fid: + with open(fname, "wb") as fid: # TODO. currently shutil writes the file to the hard # drive. try to find way around this step, so file does # not need to be written and then read. gzip_file = gzip.GzipFile(fileobj=r2.raw) shutil.copyfileobj(gzip_file, fid) - print('SUCCEEDED REQUEST for ' + iii) + print("SUCCEEDED REQUEST for " + iii) else: - print('404 message ' + iii) + print("404 message " + iii) mmm += 1 except RuntimeError: jjj += 1 - print('REQUEST FAILED ' + iii) + print("REQUEST FAILED " + iii) pass if jjj > 100: - print('Over ' + str(jjj) + ' failed. break loop') + print("Over " + str(jjj) + " failed. break loop") break return objs diff --git a/monetio/obs/nadp.py b/monetio/obs/nadp.py index 9691ea80..2585c733 100644 --- a/monetio/obs/nadp.py +++ b/monetio/obs/nadp.py @@ -1,175 +1,178 @@ """ READS NAPD DATA """ -from builtins import object import pandas as pd from numpy import NaN -def add_data(dates, network='NTN', siteid=None, weekly=True): +def add_data(dates, network="NTN", siteid=None, weekly=True): n = NADP() df = n.add_data(dates, network=network, siteid=siteid, weekly=weekly) return df -class NADP(object): +class NADP: def __init__(self): self.weekly = True self.network = None self.df = pd.DataFrame() - self.objtype = 'NADP' + self.objtype = "NADP" self.url = None - def build_url(self, network='NTN', siteid=None): - baseurl = 'http://nadp.slh.wisc.edu/datalib/' + def build_url(self, network="NTN", siteid=None): + baseurl = "http://nadp.slh.wisc.edu/datalib/" if siteid is not None: - siteid = siteid.upper() + '-' + siteid = siteid.upper() + "-" else: - siteid = '' - if network.lower() == 'amnet': - url = 'http://nadp.slh.wisc.edu/datalib/AMNet/AMNet-All.zip' - elif network.lower() == 'amon': - url = 'http://nadp.slh.wisc.edu/dataLib/AMoN/csv/all-ave.csv' - elif network.lower() == 'airmon': - url = 'http://nadp.slh.wisc.edu/datalib/AIRMoN/AIRMoN-ALL.csv' + siteid = "" + if network.lower() == "amnet": + url = "http://nadp.slh.wisc.edu/datalib/AMNet/AMNet-All.zip" + elif network.lower() == "amon": + url = "http://nadp.slh.wisc.edu/dataLib/AMoN/csv/all-ave.csv" + elif network.lower() == "airmon": + url = "http://nadp.slh.wisc.edu/datalib/AIRMoN/AIRMoN-ALL.csv" else: if self.weekly: - url = baseurl + network.lower() + '/weekly/' + siteid + network.upper() + '-All-w.csv' + url = ( + baseurl + network.lower() + "/weekly/" + siteid + network.upper() + "-All-w.csv" + ) else: - url = baseurl + network.lower() + '/annual/' + siteid + network.upper() + '-All-a.csv' + url = ( + baseurl + network.lower() + "/annual/" + siteid + network.upper() + "-All-a.csv" + ) return url def network_names(self): - print('Available Networks: AMNET, NTN, MDN, AIRMON, AMON') + print("Available Networks: AMNET, NTN, MDN, AIRMON, AMON") def read_ntn(self, url): - print('Reading NADP-NTN Data...') + print("Reading NADP-NTN Data...") print(url) # header = self.get_columns() df = pd.read_csv(url, infer_datetime_format=True, parse_dates=[2, 3]) df.columns = [i.lower() for i in df.columns] - df.rename(columns={'dateon': 'time', 'dateoff': 'time_off'}, inplace=True) + df.rename(columns={"dateon": "time", "dateoff": "time_off"}, inplace=True) try: - meta = pd.read_csv('https://bit.ly/2sPMvaO') + meta = pd.read_csv("https://bit.ly/2sPMvaO") except RuntimeError: - meta = pd.read_csv(self.__path__ + '/../../data/ntn-sites.csv') + meta = pd.read_csv(self.__path__ + "/../../data/ntn-sites.csv") meta.columns = [i.lower() for i in meta.columns] - meta.drop(['startdate', 'stopdate'], axis=1, inplace=True) - dfn = pd.merge(df, meta, on='siteid', how='left') - dfn.dropna(subset=['latitude', 'longitude'], inplace=True) - dfn.loc[(dfn.flagmg == '<') | (dfn.mg < 0), 'mg'] = NaN - dfn.loc[(dfn.flagbr == '<') | (dfn.br < 0), 'br'] = NaN - dfn.loc[(dfn.flagso4 == '<') | (dfn.so4 < 0), 'so4'] = NaN - dfn.loc[(dfn.flagcl == '<') | (dfn.cl < 0), 'cl'] = NaN - dfn.loc[(dfn.flagno3 == '<') | (dfn.no3 < 0), 'no3'] = NaN - dfn.loc[(dfn.flagnh4 == '<') | (dfn.nh4 < 0), 'nh4'] = NaN - dfn.loc[(dfn.flagk == '<') | (dfn.k < 0), 'k'] = NaN - dfn.loc[(dfn.flagna == '<') | (dfn.na < 0), 'na'] = NaN - dfn.loc[(dfn.flagca == '<') | (dfn.ca < 0), 'ca'] = NaN + meta.drop(["startdate", "stopdate"], axis=1, inplace=True) + dfn = pd.merge(df, meta, on="siteid", how="left") + dfn.dropna(subset=["latitude", "longitude"], inplace=True) + dfn.loc[(dfn.flagmg == "<") | (dfn.mg < 0), "mg"] = NaN + dfn.loc[(dfn.flagbr == "<") | (dfn.br < 0), "br"] = NaN + dfn.loc[(dfn.flagso4 == "<") | (dfn.so4 < 0), "so4"] = NaN + dfn.loc[(dfn.flagcl == "<") | (dfn.cl < 0), "cl"] = NaN + dfn.loc[(dfn.flagno3 == "<") | (dfn.no3 < 0), "no3"] = NaN + dfn.loc[(dfn.flagnh4 == "<") | (dfn.nh4 < 0), "nh4"] = NaN + dfn.loc[(dfn.flagk == "<") | (dfn.k < 0), "k"] = NaN + dfn.loc[(dfn.flagna == "<") | (dfn.na < 0), "na"] = NaN + dfn.loc[(dfn.flagca == "<") | (dfn.ca < 0), "ca"] = NaN return dfn def read_mdn(self, url): - print('Reading NADP-MDN Data...') + print("Reading NADP-MDN Data...") # header = self.get_columns() df = pd.read_csv(url, infer_datetime_format=True, parse_dates=[1, 2]) df.columns = [i.lower() for i in df.columns] - df.rename(columns={'dateon': 'time', 'dateoff': 'time_off'}, inplace=True) + df.rename(columns={"dateon": "time", "dateoff": "time_off"}, inplace=True) try: - meta = pd.read_csv('https://bit.ly/2Lq6kgq') - meta.drop(['startdate', 'stopdate'], axis=1, inplace=True) + meta = pd.read_csv("https://bit.ly/2Lq6kgq") + meta.drop(["startdate", "stopdate"], axis=1, inplace=True) except RuntimeError: - meta = pd.read_csv(self.__path__ + '/../../data/mdn-sites.csv') - meta.drop(['startdate', 'stopdate'], axis=1, inplace=True) + meta = pd.read_csv(self.__path__ + "/../../data/mdn-sites.csv") + meta.drop(["startdate", "stopdate"], axis=1, inplace=True) meta.columns = [i.lower() for i in meta.columns] - dfn = pd.merge(df, meta, on='siteid', how='left') - dfn.dropna(subset=['latitude', 'longitude'], inplace=True) - dfn.loc[dfn.qr == 'C', ['rgppt', 'svol', 'subppt', 'hgconc', 'hgdep']] = NaN + dfn = pd.merge(df, meta, on="siteid", how="left") + dfn.dropna(subset=["latitude", "longitude"], inplace=True) + dfn.loc[dfn.qr == "C", ["rgppt", "svol", "subppt", "hgconc", "hgdep"]] = NaN return dfn def read_airmon(self, url): - print('Reading NADP-AIRMoN Data...') + print("Reading NADP-AIRMoN Data...") # header = self.get_columns() df = pd.read_csv(url, infer_datetime_format=True, parse_dates=[2, 3]) df.columns = [i.lower() for i in df.columns] - df.rename(columns={'dateon': 'time', 'dateoff': 'time_off'}, inplace=True) + df.rename(columns={"dateon": "time", "dateoff": "time_off"}, inplace=True) try: - meta = pd.read_csv('https://bit.ly/2xMlgTW') - meta.drop(['startdate', 'stopdate'], axis=1, inplace=True) + meta = pd.read_csv("https://bit.ly/2xMlgTW") + meta.drop(["startdate", "stopdate"], axis=1, inplace=True) except RuntimeError: - meta = pd.read_csv(self.__path__ + '/../../data/airmon-sites.csv') - meta.drop(['startdate', 'stopdate'], axis=1, inplace=True) + meta = pd.read_csv(self.__path__ + "/../../data/airmon-sites.csv") + meta.drop(["startdate", "stopdate"], axis=1, inplace=True) meta.columns = [i.lower() for i in meta.columns] - dfn = pd.merge(df, meta, on='siteid', how='left') - dfn.dropna(subset=['latitude', 'longitude'], inplace=True) + dfn = pd.merge(df, meta, on="siteid", how="left") + dfn.dropna(subset=["latitude", "longitude"], inplace=True) dfn.loc[ - dfn.qrcode == 'C', + dfn.qrcode == "C", [ - 'subppt', - 'pptnws', - 'pptbel', - 'svol', - 'ca', - 'mg', - 'k', - 'na', - 'nh4', - 'no3', - 'cl', - 'so4', - 'po4', - 'phlab', - 'phfield', - 'conduclab', - 'conducfield', + "subppt", + "pptnws", + "pptbel", + "svol", + "ca", + "mg", + "k", + "na", + "nh4", + "no3", + "cl", + "so4", + "po4", + "phlab", + "phfield", + "conduclab", + "conducfield", ], ] = NaN return dfn def read_amon(self, url): - print('Reading NADP-AMoN Data...') + print("Reading NADP-AMoN Data...") # header = self.get_columns() df = pd.read_csv(url, infer_datetime_format=True, parse_dates=[2, 3]) df.columns = [i.lower() for i in df.columns] - df.rename(columns={'startdate': 'time', 'enddate': 'time_off'}, inplace=True) + df.rename(columns={"startdate": "time", "enddate": "time_off"}, inplace=True) try: - meta = pd.read_csv('https://bit.ly/2sJmkCg') - meta.drop(['startdate', 'stopdate'], axis=1, inplace=True) + meta = pd.read_csv("https://bit.ly/2sJmkCg") + meta.drop(["startdate", "stopdate"], axis=1, inplace=True) except RuntimeError: - meta = pd.read_csv(self.__path__ + '/../../data/amon-sites.csv') - meta.drop(['startdate', 'stopdate'], axis=1, inplace=True) + meta = pd.read_csv(self.__path__ + "/../../data/amon-sites.csv") + meta.drop(["startdate", "stopdate"], axis=1, inplace=True) meta.columns = [i.lower() for i in meta.columns] - dfn = pd.merge(df, meta, on='siteid', how='left') - dfn.dropna(subset=['latitude', 'longitude'], inplace=True) - dfn.loc[dfn.qr == 'C', ['airvol', 'conc']] = NaN + dfn = pd.merge(df, meta, on="siteid", how="left") + dfn.dropna(subset=["latitude", "longitude"], inplace=True) + dfn.loc[dfn.qr == "C", ["airvol", "conc"]] = NaN return dfn def read_amnet(self, url): - print('Reading NADP-AMNet Data...') + print("Reading NADP-AMNet Data...") # header = self.get_columns() df = pd.read_csv(url, infer_datetime_format=True, parse_dates=[2, 3]) df.columns = [i.lower() for i in df.columns] - df.rename(columns={'startdate': 'time', 'enddate': 'time_off'}, inplace=True) + df.rename(columns={"startdate": "time", "enddate": "time_off"}, inplace=True) try: - meta = pd.read_csv('https://bit.ly/2sJmkCg') - meta.drop(['startdate', 'stopdate'], axis=1, inplace=True) + meta = pd.read_csv("https://bit.ly/2sJmkCg") + meta.drop(["startdate", "stopdate"], axis=1, inplace=True) except RuntimeError: - meta = pd.read_csv(self.__path__ + '/../../data/amnet-sites.csv') - meta.drop(['startdate', 'stopdate'], axis=1, inplace=True) + meta = pd.read_csv(self.__path__ + "/../../data/amnet-sites.csv") + meta.drop(["startdate", "stopdate"], axis=1, inplace=True) meta.columns = [i.lower() for i in meta.columns] - dfn = pd.merge(df, meta, on='siteid', how='left') - dfn.dropna(subset=['latitude', 'longitude'], inplace=True) - dfn.loc[dfn.qr == 'C', ['airvol', 'conc']] = NaN + dfn = pd.merge(df, meta, on="siteid", how="left") + dfn.dropna(subset=["latitude", "longitude"], inplace=True) + dfn.loc[dfn.qr == "C", ["airvol", "conc"]] = NaN return dfn - def add_data(self, dates, network='NTN', siteid=None, weekly=True): + def add_data(self, dates, network="NTN", siteid=None, weekly=True): url = self.build_url(network=network, siteid=siteid) - if network.lower() == 'ntn': + if network.lower() == "ntn": df = self.read_ntn(url) - elif network.lower() == 'mdn': + elif network.lower() == "mdn": df = self.read_mdn(url) - elif network.lower() == 'amon': + elif network.lower() == "amon": df = self.read_amon(url) - elif network.lower() == 'airmon': + elif network.lower() == "airmon": df = self.read_airmon(url) else: df = self.read_amnet(url) @@ -178,7 +181,7 @@ def add_data(self, dates, network='NTN', siteid=None, weekly=True): return df - def set_daterange(self, begin='', end=''): - dates = pd.date_range(start=begin, end=end, freq='H') + def set_daterange(self, begin="", end=""): + dates = pd.date_range(start=begin, end=end, freq="H") self.dates = dates return dates diff --git a/monetio/obs/obs_util.py b/monetio/obs/obs_util.py index d4292965..ca99ad78 100644 --- a/monetio/obs/obs_util.py +++ b/monetio/obs/obs_util.py @@ -6,7 +6,7 @@ import numpy as np -def find_near(df, latlon, distance=100, sid='site_num', drange=None): +def find_near(df, latlon, distance=100, sid="site_num", drange=None): """find all values in the df dataframe column sid which are within distance (km) of lat lon point. output dictionary with key as value in column sid and value tuple (latitude, longitude) @@ -41,7 +41,7 @@ def find_near(df, latlon, distance=100, sid='site_num', drange=None): return lhash -def write_datem(df, obscolumn='obs', dname='datemfile.txt', sitename='1', info=None, drange=None): +def write_datem(df, obscolumn="obs", dname="datemfile.txt", sitename="1", info=None, drange=None): """returns string in datem format (See NOAA ARL). datem format has the following columns: Year, Month, Day, Hour, Duration, lat, lon, Concentration (units), site @@ -69,48 +69,52 @@ def write_datem(df, obscolumn='obs', dname='datemfile.txt', sitename='1', info=N if drange: df = timefilter(df, drange) - units = df['units'].tolist() + units = df["units"].tolist() units = list(set(units)) sdate = datetime.datetime(2010, 1, 1, 0) if len(units) > 1: - print('WARNING, more than one type of unit ', units) - ustr = '' + print("WARNING, more than one type of unit ", units) + ustr = "" for uuu in units: - ustr += uuu + ' ' + ustr += uuu + " " runstring = "Beginning date " + sdate.strftime("%Y %m %d %H:%M") + " UTC ---" - runstring += 'Information ' + runstring += "Information " if info: runstring += info + "\n" else: runstring += "\n" - runstring += 'Year, Month, Day, Hour:Minute (UTC), Dur(hhmm) , LAT, LON, Concentration (' + ustr + "), sid, height\n" - lat = df['latitude'] - lon = df['longitude'] + runstring += ( + "Year, Month, Day, Hour:Minute (UTC), Dur(hhmm) , LAT, LON, Concentration (" + + ustr + + "), sid, height\n" + ) + lat = df["latitude"] + lon = df["longitude"] cval = df[obscolumn] # print t2 - t1 = df['time'] - duration = ' 0100 ' - height = '20' + t1 = df["time"] + duration = " 0100 " + height = "20" if sitename in df.columns.values: sval = df[sitename] else: sval = [sitename] * len(cval) for val in zip(t1, lat, lon, cval, sval): - runstring += val[0].strftime('%Y %m %d %H%M') + duration + runstring += val[0].strftime("%Y %m %d %H%M") + duration try: - runstring += str(val[1]) + ' ' + str(val[2]) + ' ' + runstring += str(val[1]) + " " + str(val[2]) + " " except RuntimeError: - print('WARNING1', val[1]) + print("WARNING1", val[1]) print(val[2]) print(type(val[1])) print(type(val[2])) sys.exit() if isinstance(val[4], str): - runstring += "{:.3f}".format(val[3]) + ' ' + val[4] + ' ' + height + "\n" + runstring += f"{val[3]:.3f}" + " " + val[4] + " " + height + "\n" else: - runstring += "{:.3f}".format(val[3]) + ' ' + "{0:d}".format(val[4]) + ' ' + height + "\n" + runstring += f"{val[3]:.3f}" + " " + f"{val[4]:d}" + " " + height + "\n" - with open(dname, 'w') as fid: + with open(dname, "w") as fid: fid.write(runstring) return runstring @@ -126,9 +130,9 @@ def get_lhash(df, idn): value a tuple of (lat, lon) Useful for getting lat lon locations of different sites in a dataframe. """ - if 'latitude' in list(df.columns.values): + if "latitude" in list(df.columns.values): dftemp = df.copy() - pairs = zip(dftemp[idn], zip(dftemp['latitude'], dftemp['longitude'])) + pairs = zip(dftemp[idn], zip(dftemp["latitude"], dftemp["longitude"])) pairs = list(set(pairs)) lhash = dict(pairs) # key is facility id and value is name. print(lhash) @@ -143,7 +147,7 @@ def summarize(df, verbose=False): for ccc in columns: print(ccc) print(df[ccc].unique()) - print('-------------------------------') + print("-------------------------------") for ccc in columns: print(ccc) @@ -168,10 +172,10 @@ def latlonfilter(df, llcrnr, urcrnr): lat2 = urcrnr[0] lon1 = llcrnr[1] lon2 = urcrnr[1] - df = df[df['latitude'] < lat2] - df = df[df['latitude'] > lat1] - df = df[df['longitude'] > lon1] - df = df[df['longitude'] < lon2] + df = df[df["latitude"] < lat2] + df = df[df["latitude"] > lat1] + df = df[df["longitude"] > lon1] + df = df[df["longitude"] < lon2] return df @@ -184,6 +188,6 @@ def timefilter(df, daterange, inplace=True): inplace: boolean if TRUE then replaces self.df attribute """ - df = df[df['time'] > daterange[0]] - df = df[df['time'] < daterange[1]] + df = df[df["time"] > daterange[0]] + df = df[df["time"] < daterange[1]] return df diff --git a/monetio/obs/openaq.py b/monetio/obs/openaq.py index 56d19644..53cd77c2 100644 --- a/monetio/obs/openaq.py +++ b/monetio/obs/openaq.py @@ -25,10 +25,8 @@ """ import json -import dask -import dask.dataframe as dd import pandas as pd -from numpy import NaN, vectorize +from numpy import NaN def add_data(dates, n_procs=1): @@ -54,36 +52,41 @@ def add_data(dates, n_procs=1): class OPENAQ: def __init__(self): import s3fs - from numpy import vectorize self.fs = s3fs.S3FileSystem(anon=True) - self.s3bucket = 'openaq-fetches/realtime' + self.s3bucket = "openaq-fetches/realtime" def _get_available_days(self, dates): folders = self.fs.ls(self.s3bucket) - days = [j.split('/')[2] for j in folders] - avail_dates = pd.to_datetime(days, format='%Y-%m-%d', errors='coerce') - dates = pd.to_datetime(dates).floor(freq='D') - d = pd.Series(dates, name='dates').drop_duplicates() - ad = pd.Series(avail_dates, name='dates') - return pd.merge(d, ad, how='inner') + days = [j.split("/")[2] for j in folders] + avail_dates = pd.to_datetime(days, format="%Y-%m-%d", errors="coerce") + dates = pd.to_datetime(dates).floor(freq="D") + d = pd.Series(dates, name="dates").drop_duplicates() + ad = pd.Series(avail_dates, name="dates") + return pd.merge(d, ad, how="inner") def _get_files_in_day(self, date): - files = self.fs.ls("{}/{}".format(self.s3bucket, date.strftime('%Y-%m-%d'))) + files = self.fs.ls("{}/{}".format(self.s3bucket, date.strftime("%Y-%m-%d"))) return files def build_urls(self, dates): d = self._get_available_days(dates) - urls = pd.Series([], name='url') + urls = pd.Series([], name="url") for i in d.dates: files = self._get_files_in_day(i) - furls = pd.Series([f.replace('openaq-fetches', 'https://openaq-fetches.s3.amazonaws.com') for f in files], name='url') - urls = pd.merge(urls, furls, how='outer') + furls = pd.Series( + [ + f.replace("openaq-fetches", "https://openaq-fetches.s3.amazonaws.com") + for f in files + ], + name="url", + ) + urls = pd.merge(urls, furls, how="outer") return urls.url.values def add_data(self, dates, num_workers=1): - import dask.dataframe as dd import dask + import dask.dataframe as dd urls = self.build_urls(dates).tolist() # z = dd.read_json(urls).compute() @@ -92,20 +95,27 @@ def add_data(self, dates, num_workers=1): z = dff.compute(num_workers=num_workers) z.coordinates.replace(to_replace=[None], value=pd.np.nan, inplace=True) z = z.dropna().reset_index(drop=True) - js = json.loads(z[['coordinates', 'date']].to_json(orient='records')) + js = json.loads(z[["coordinates", "date"]].to_json(orient="records")) dff = pd.io.json.json_normalize(js) - dff.columns = dff.columns.str.split('.').str[1] - dff.rename({'local': 'time_local', 'utc': 'time'}, axis=1, inplace=True) + dff.columns = dff.columns.str.split(".").str[1] + dff.rename({"local": "time_local", "utc": "time"}, axis=1, inplace=True) - dff['time'] = pd.to_datetime(dff.time) - dff['time_local'] = pd.to_datetime(dff.time_local) - zzz = z.join(dff).drop(columns=['coordinates', 'date', 'attribution', 'averagingPeriod']) + dff["time"] = pd.to_datetime(dff.time) + dff["time_local"] = pd.to_datetime(dff.time_local) + zzz = z.join(dff).drop(columns=["coordinates", "date", "attribution", "averagingPeriod"]) zp = self._pivot_table(zzz) - zp['siteid'] = zp.country + '_' + zp.latitude.round(3).astype(str) + 'N_' + zp.longitude.round(3).astype(str) + 'E' - - zp['time'] = zp.time.dt.tz_localize(None) + zp["siteid"] = ( + zp.country + + "_" + + zp.latitude.round(3).astype(str) + + "N_" + + zp.longitude.round(3).astype(str) + + "E" + ) + + zp["time"] = zp.time.dt.tz_localize(None) tzinfo = zp.time_local.apply(lambda x: x.tzinfo.utcoffset(x)) - zp['time_local'] = zp['time'] + tzinfo + zp["time_local"] = zp["time"] + tzinfo return zp.loc[zp.time >= dates.min()] def read_json(self, url): @@ -129,37 +139,59 @@ def _parse_latlon(self, df): # lat = vectorize(lambda x: x['latitude']) # lon = vectorize(lambda x: x['longitude']) def lat(x): - return x['latitude'] + return x["latitude"] def lon(x): - return x['longitude'] + return x["longitude"] - df['latitude'] = df.coordinates.apply(lat) - df['longitude'] = df.coordinates.apply(lon) - return df.drop(columns='coordinates') + df["latitude"] = df.coordinates.apply(lat) + df["longitude"] = df.coordinates.apply(lon) + return df.drop(columns="coordinates") def _parse_datetime(self, df): def utc(x): - return pd.to_datetime(x['utc']) + return pd.to_datetime(x["utc"]) def local(x): - return pd.to_datetime(x['local']) + return pd.to_datetime(x["local"]) - df['time'] = df.date.apply(utc) - df['time_local'] = df.date.apply(local) - return df.drop(columns='date') + df["time"] = df.date.apply(utc) + df["time_local"] = df.date.apply(local) + return df.drop(columns="date") def _fix_units(self, df): df.loc[df.value <= 0] = NaN - df.loc[(df.parameter == 'co') & (df.unit != 'ppm'), 'value'] /= 1145 - df.loc[(df.parameter == 'o3') & (df.unit != 'ppm'), 'value'] /= 2000 - df.loc[(df.parameter == 'so2') & (df.unit != 'ppm'), 'value'] /= 2620 - df.loc[(df.parameter == 'no2') & (df.unit != 'ppm'), 'value'] /= 1880 + df.loc[(df.parameter == "co") & (df.unit != "ppm"), "value"] /= 1145 + df.loc[(df.parameter == "o3") & (df.unit != "ppm"), "value"] /= 2000 + df.loc[(df.parameter == "so2") & (df.unit != "ppm"), "value"] /= 2620 + df.loc[(df.parameter == "no2") & (df.unit != "ppm"), "value"] /= 1880 return df def _pivot_table(self, df): w = df.pivot_table( - values='value', index=['time', 'latitude', 'longitude', 'sourceName', 'sourceType', 'city', 'country', 'time_local'], columns='parameter' + values="value", + index=[ + "time", + "latitude", + "longitude", + "sourceName", + "sourceType", + "city", + "country", + "time_local", + ], + columns="parameter", ).reset_index() - w = w.rename(dict(co='co_ppm', o3='o3_ppm', no2='no2_ppm', so2='so2_ppm', bc='bc_umg3', pm25='pm25_ugm3', pm10='pm10_ugm3'), axis=1) + w = w.rename( + dict( + co="co_ppm", + o3="o3_ppm", + no2="no2_ppm", + so2="so2_ppm", + bc="bc_umg3", + pm25="pm25_ugm3", + pm10="pm10_ugm3", + ), + axis=1, + ) return w diff --git a/monetio/obs/pams.py b/monetio/obs/pams.py index 1c7ee707..e2a0788e 100644 --- a/monetio/obs/pams.py +++ b/monetio/obs/pams.py @@ -2,6 +2,7 @@ # https://aqs.epa.gov/aqsweb/documents/data_api.html import json + import pandas as pd @@ -21,59 +22,69 @@ def add_data(filename): """ jsonf = open_json(filename) - dataf = jsonf['Data'] + dataf = jsonf["Data"] data = pd.DataFrame.from_dict(dataf) # Combining state code, county code, and site number into one column - data['siteid'] = data.state_code.astype(str).str.zfill(2) + data.county_code.astype(str).str.zfill(3) + data.site_number.astype(str).str.zfill(4) + data["siteid"] = ( + data.state_code.astype(str).str.zfill(2) + + data.county_code.astype(str).str.zfill(3) + + data.site_number.astype(str).str.zfill(4) + ) # Combining date and time into one column - data['datetime_local'] = pd.to_datetime(data['date_local'] + ' ' + data['time_local']) - data['datetime_utc'] = pd.to_datetime(data['date_gmt'] + ' ' + data['time_gmt']) + data["datetime_local"] = pd.to_datetime(data["date_local"] + " " + data["time_local"]) + data["datetime_utc"] = pd.to_datetime(data["date_gmt"] + " " + data["time_gmt"]) # Renaming columns - data = data.rename(columns={'sample_measurement': 'obs', 'units_of_measure': 'units', 'units_of_measure_code': 'unit_code'}) + data = data.rename( + columns={ + "sample_measurement": "obs", + "units_of_measure": "units", + "units_of_measure_code": "unit_code", + } + ) # Dropping some columns, and reordering columns data = data.drop( columns=[ - 'state_code', - 'county_code', - 'site_number', - 'datum', - 'qualifier', - 'uncertainty', - 'county', - 'state', - 'date_of_last_change', - 'date_local', - 'time_local', - 'date_gmt', - 'time_gmt', - 'poc', - 'unit_code', - 'sample_duration_code', - 'method_code', + "state_code", + "county_code", + "site_number", + "datum", + "qualifier", + "uncertainty", + "county", + "state", + "date_of_last_change", + "date_local", + "time_local", + "date_gmt", + "time_gmt", + "poc", + "unit_code", + "sample_duration_code", + "method_code", ] ) cols = data.columns.tolist() - cols.insert(0, cols.pop(cols.index('siteid'))) - cols.insert(1, cols.pop(cols.index('latitude'))) - cols.insert(2, cols.pop(cols.index('longitude'))) - cols.insert(3, cols.pop(cols.index('datetime_local'))) - cols.insert(4, cols.pop(cols.index('datetime_utc'))) + cols.insert(0, cols.pop(cols.index("siteid"))) + cols.insert(1, cols.pop(cols.index("latitude"))) + cols.insert(2, cols.pop(cols.index("longitude"))) + cols.insert(3, cols.pop(cols.index("datetime_local"))) + cols.insert(4, cols.pop(cols.index("datetime_utc"))) data = data.reindex(columns=cols) # Adjusting parameter units units = data.units.unique() for i in units: con = data.units == i - if i.upper() == 'Parts per billion Carbon'.upper(): - data.loc[con, 'units'] = 'ppbC' - if i == 'Parts per billion': - data.loc[con, 'units'] = 'ppb' - if i == 'Parts per million': - data.loc[con, 'units'] = 'ppm' + if i.upper() == "Parts per billion Carbon".upper(): + data.loc[con, "units"] = "ppbC" + if i == "Parts per billion": + data.loc[con, "units"] = "ppb" + if i == "Parts per million": + data.loc[con, "units"] = "ppm" return data @@ -114,7 +125,7 @@ def get_header(filename): """ jsonf = open_json(filename) - header = jsonf['Header'] + header = jsonf["Header"] header = pd.DataFrame.from_dict(header) return header @@ -136,5 +147,5 @@ def write_csv(array, filename): Generates csv file of specified name in specified location """ - array.to_csv(filename, encoding='utf-8', index=False) - return 'csv file ' + filename + ' has been generated' + array.to_csv(filename, encoding="utf-8", index=False) + return "csv file " + filename + " has been generated" diff --git a/monetio/profile/__init__.py b/monetio/profile/__init__.py index 196916eb..e0f7747c 100644 --- a/monetio/profile/__init__.py +++ b/monetio/profile/__init__.py @@ -1,5 +1,5 @@ from . import icartt, tolnet -__all__ = ['tolnet', 'icartt'] +__all__ = ["tolnet", "icartt"] -__name__ = 'profile' +__name__ = "profile" diff --git a/monetio/profile/icartt.py b/monetio/profile/icartt.py index 5e32807d..9c86031c 100644 --- a/monetio/profile/icartt.py +++ b/monetio/profile/icartt.py @@ -1,7 +1,8 @@ import datetime import sys -import xarray as xr + import pandas as pd +import xarray as xr from numpy import NaN @@ -184,7 +185,7 @@ def __getitem__(self, name): """ idx = self.index(name) if idx == -1: - raise Exception("{:s} not found in data".format(name)) + raise Exception(f"{name:s} not found in data") return [x[idx] for x in self.data] def units(self, name): @@ -192,7 +193,7 @@ def units(self, name): Units of variable """ res = [x.units for x in self.VAR if x.name == name] - if len(res) is 0: + if len(res) == 0: res = [""] return res[0] @@ -201,7 +202,7 @@ def index(self, name): Index of variable in data array """ res = [i for i, x in enumerate(self.VAR) if x.name == name] - if len(res) is 0: + if len(res) == 0: res = [-1] return res[0] @@ -214,7 +215,7 @@ def prnt(txt): f.write(str(txt) + "\n") # Number of lines in header, file format index (most files use 1001) - comma delimited. - prnt("{:d}, {:d}".format(self.nheader, self.format)) + prnt(f"{self.nheader:d}, {self.format:d}") # PI last name, first name/initial. prnt(self.PI) # Organization/affiliation of PI. @@ -226,7 +227,14 @@ def prnt(txt): # File volume number, number of file volumes (these integer values are used when the data require more than one file per day; for data that require only one file these values are set to 1, 1) - comma delimited. prnt(self.splitChar.join([str(self.VOL), str(self.NVOL)])) # UTC date when data begin, UTC date of data reduction or revision - comma delimited (yyyy, mm, dd, yyyy, mm, dd). - prnt(self.splitChar.join([datetime.datetime.strftime(x, "%Y, %m, %d") for x in [self.dateValid, self.dateRevised]])) + prnt( + self.splitChar.join( + [ + datetime.datetime.strftime(x, "%Y, %m, %d") + for x in [self.dateValid, self.dateRevised] + ] + ) + ) # Data Interval (This value describes the time spacing (in seconds) between consecutive data records. It is the (constant) interval between values of the independent variable. For 1 Hz data the data interval value is 1 and for 10 Hz data the value is 0.1. All intervals longer than 1 second must be reported as Start and Stop times, and the Data Interval value is set to 0. The Mid-point time is required when it is not at the average of Start and Stop times. For additional information see Section 2.5 below.). prnt("0") # Description or name of independent variable (This is the name chosen for the start time. It always refers to the number of seconds UTC from the start of the day on which measurements began. It should be noted here that the independent variable should monotonically increase even when crossing over to a second day.). @@ -237,37 +245,47 @@ def prnt(txt): # Number of variables (Integer value showing the number of dependent variables: the total number of columns of data is this value plus one.). prnt(self.ndvar) # Scale factors (1 for most cases, except where grossly inconvenient) - comma delimited. - prnt(self.splitChar.join(["{:6.3f}".format(x.scale) for x in self.DVAR])) + prnt(self.splitChar.join([f"{x.scale:6.3f}" for x in self.DVAR])) # Missing data indicators (This is -9999 (or -99999, etc.) for any missing data condition, except for the main time (independent) variable which is never missing) - comma delimited. prnt(self.splitChar.join([str(x.miss) for x in self.DVAR])) # Variable names and units (Short variable name and units are required, and optional long descriptive name, in that order, and separated by commas. If the variable is unitless, enter the keyword "none" for its units. Each short variable name and units (and optional long name) are entered on one line. The short variable name must correspond exactly to the name used for that variable as a column header, i.e., the last header line prior to start of data.). - nul = [prnt(x.desc) for x in self.DVAR] + _ = [prnt(x.desc) for x in self.DVAR] if self.format == 2110: # Number of variables (Integer value showing the number of dependent variables: the total number of columns of data is this value plus one.). prnt(self.nauxvar) # Scale factors (1 for most cases, except where grossly inconvenient) - comma delimited. - prnt(self.splitChar.join(["{:6.3f}".format(x.scale) for x in self.AUXVAR])) + prnt(self.splitChar.join([f"{x.scale:6.3f}" for x in self.AUXVAR])) # Missing data indicators (This is -9999 (or -99999, etc.) for any missing data condition, except for the main time (independent) variable which is never missing) - comma delimited. prnt(self.splitChar.join([str(x.miss) for x in self.AUXVAR])) # Variable names and units (Short variable name and units are required, and optional long descriptive name, in that order, and separated by commas. If the variable is unitless, enter the keyword "none" for its units. Each short variable name and units (and optional long name) are entered on one line. The short variable name must correspond exactly to the name used for that variable as a column header, i.e., the last header line prior to start of data.). - nul = [prnt(x.desc) for x in self.AUXVAR] + _ = [prnt(x.desc) for x in self.AUXVAR] # Number of SPECIAL comment lines (Integer value indicating the number of lines of special comments, NOT including this line.). - prnt("{:d}".format(self.nscom)) + prnt(f"{self.nscom:d}") # Special comments (Notes of problems or special circumstances unique to this file. An example would be comments/problems associated with a particular flight.). - nul = [prnt(x) for x in self.SCOM] + _ = [prnt(x) for x in self.SCOM] # Number of Normal comments (i.e., number of additional lines of SUPPORTING information: Integer value indicating the number of lines of additional information, NOT including this line.). - prnt("{:d}".format(self.nncom)) + prnt(f"{self.nncom:d}") # Normal comments (SUPPORTING information: This is the place for investigators to more completely describe the data and measurement parameters. The supporting information structure is described below as a list of key word: value pairs. Specifically include here information on the platform used, the geo-location of data, measurement technique, and data revision comments. Note the non-optional information regarding uncertainty, the upper limit of detection (ULOD) and the lower limit of detection (LLOD) for each measured variable. The ULOD and LLOD are the values, in the same units as the measurements that correspond to the flags -7777s and -8888s within the data, respectively. The last line of this section should contain all the short variable names on one line. The key words in this section are written in BOLD below and must appear in this section of the header along with the relevant data listed after the colon. For key words where information is not needed or applicable, simply enter N/A.). - nul = [prnt(x) for x in self.NCOM] + _ = [prnt(x) for x in self.NCOM] # data! - nul = [prnt(self.splitChar.join([str(y) for y in x])) for x in self.data] + _ = [prnt(self.splitChar.join([str(y) for y in x])) for x in self.data] def make_filename(self): """ Create ICARTT-compliant file name based on the information contained in the dataset """ - return self.dataID + "_" + self.locationID + "_" + datetime.datetime.strftime(self.dateValid, "%Y%m%d") + "_" + "R" + self.revision + ".ict" + return ( + self.dataID + + "_" + + self.locationID + + "_" + + datetime.datetime.strftime(self.dateValid, "%Y%m%d") + + "_" + + "R" + + self.revision + + ".ict" + ) # sanitize function def __readline(self, do_split=True): @@ -310,8 +328,10 @@ def read_header(self): # line 7 - UTC date when data begin, UTC date of data reduction or revision # - comma delimited (yyyy, mm, dd, yyyy, mm, dd). dmp = self.__readline() - self.dateValid = datetime.datetime.strptime("".join(["{:s}".format(x) for x in dmp[0:3]]), "%Y%m%d") - self.dateRevised = datetime.datetime.strptime("".join(["{:s}".format(x) for x in dmp[3:6]]), "%Y%m%d") + self.dateValid = datetime.datetime.strptime("".join([f"{x:s}" for x in dmp[0:3]]), "%Y%m%d") + self.dateRevised = datetime.datetime.strptime( + "".join([f"{x:s}" for x in dmp[3:6]]), "%Y%m%d" + ) # line 8 - Data Interval (This value describes the time spacing (in seconds) # between consecutive data records. It is the (constant) interval between @@ -362,7 +382,10 @@ def read_header(self): dvname += [dmp[0]] dvunits += [dmp[1]] - self.DVAR = [Variable(name, unit, scale, miss) for name, unit, scale, miss in zip(dvname, dvunits, dvscale, dvmiss)] + self.DVAR = [ + Variable(name, unit, scale, miss) + for name, unit, scale, miss in zip(dvname, dvunits, dvscale, dvmiss) + ] # line 14 + nvar - Number of SPECIAL comment lines (Integer value # indicating the number of lines of special comments, NOT including this @@ -423,9 +446,11 @@ def read_data(self): if self.input_fhandle.closed: self.input_fhandle = open(self.input_fhandle.name) - nul = [self.input_fhandle.readline() for i in range(self.nheader)] + _ = [self.input_fhandle.readline() for _ in range(self.nheader)] - self.data = [self.__nan_miss_float(line.split(self.splitChar)) for line in self.input_fhandle] + self.data = [ + self.__nan_miss_float(line.split(self.splitChar)) for line in self.input_fhandle + ] self.input_fhandle.close() @@ -437,7 +462,7 @@ def read_first_and_last(self): if self.input_fhandle.closed: self.input_fhandle = open(self.input_fhandle.name) - nul = [self.input_fhandle.readline() for i in range(self.nheader)] + _ = [self.input_fhandle.readline() for _ in range(self.nheader)] first = self.input_fhandle.readline() self.data = [self.__nan_miss_float(first.split(self.splitChar))] @@ -488,14 +513,15 @@ def __init__(self, f=None, loadData=True): self.splitChar = "," # read data if f is not None + encoding = "utf-8" if f is not None: if isinstance(f, str): text = f decoded = False - self.input_fhandle = open(f, "r", encoding="utf-8") + self.input_fhandle = open(f, encoding=encoding) else: - text = f.decode(encoding) - decoded = True + text = f.decode(encoding) # noqa: F841 + decoded = True # noqa: F841 # if isinstance(f, (str, unicode)): # self.input_fhandle = open(f, 'r') # else: diff --git a/monetio/profile/tolnet.py b/monetio/profile/tolnet.py index fbfbb5a8..46444075 100644 --- a/monetio/profile/tolnet.py +++ b/monetio/profile/tolnet.py @@ -1,5 +1,4 @@ import os -from builtins import object import pandas as pd import xarray as xr @@ -12,108 +11,145 @@ def open_dataset(fname): def open_mfdataset(fname): from glob import glob + from numpy import sort + t = TOLNet() dsets = [] for i in sort(glob(fname)): dsets.append(t.add_data(i)) - return xr.concat(dsets, dim='time') + return xr.concat(dsets, dim="time") def tolnet_colormap(): from matplotlib.colors import ListedColormap from numpy import array + Colors = [ - array([255, 140, 255]) / 255., - array([221, 111, 242]) / 255., - array([187, 82, 229]) / 255., - array([153, 53, 216]) / 255., - array([119, 24, 203]) / 255., - array([0, 0, 187]) / 255., - array([0, 44, 204]) / 255., - array([0, 88, 221]) / 255., - array([0, 132, 238]) / 255., - array([0, 175, 255]) / 255., - array([0, 235, 255]) / 255., - array([39, 255, 215]) / 255., - array([99, 255, 155]) / 255., - array([163, 255, 91]) / 255., - array([211, 255, 43]) / 255., - array([255, 255, 0]) / 255., - array([255, 207, 0]) / 255., - array([255, 159, 0]) / 255., - array([255, 111, 0]) / 255., - array([255, 63, 0]) / 255., - array([255, 0, 0]) / 255., - array([216, 0, 15]) / 255., - array([178, 0, 31]) / 255., - array([140, 0, 47]) / 255., - array([102, 0, 63]) / 255., - array([52, 52, 52]) / 255., - array([96, 96, 96]) / 255., - array([140, 140, 140]) / 255., - array([184, 184, 184]) / 255., - array([228, 228, 228]) / 255., - [1.,1.,1.] ] -# Colors = [ -# array([255, 140, 255]) / 255., -# array([221, 111, 242]) / 255., -# array([187, 82, 229]) / 255., -# array([153, 53, 216]) / 255., -# array([119, 24, 203]) / 255., -# array([0, 0, 187]) / 255., -# array([0, 44, 204]) / 255., -# array([0, 88, 221]) / 255., -# array([0, 132, 238]) / 255., -# array([0, 175, 255]) / 255., -# array([0, 235, 255]) / 255., -# array([39, 255, 215]) / 255., -# array([99, 255, 155]) / 255., -# array([163, 255, 91]) / 255., -# array([211, 255, 43]) / 255., -# array([255, 255, 0]) / 255., -# array([255, 207, 0]) / 255., -# array([255, 159, 0]) / 255., -# array([255, 111, 0]) / 255., -# array([255, 63, 0]) / 255., -# array([255, 0, 0]) / 255., -# array([216, 0, 15]) / 255., -# array([178, 0, 31]) / 255., -# array([140, 0, 47]) / 255., -# array([102, 0, 63]) / 255., -# array([52, 52, 52]) / 255., -# array([52, 52, 52]) / 255., -# array([52, 52, 52]) / 255., -# array([52, 52, 52]) / 255., -# array([52, 52, 52]) / 255., -# array([52, 52, 52]) / 255., -# array([96, 96, 96]) / 255., -# array([96, 96, 96]) / 255., -# array([96, 96, 96]) / 255., -# array([96, 96, 96]) / 255., -# array([96, 96, 96]) / 255., -# array([96, 96, 96]) / 255. -# ] + array([255, 140, 255]) / 255.0, + array([221, 111, 242]) / 255.0, + array([187, 82, 229]) / 255.0, + array([153, 53, 216]) / 255.0, + array([119, 24, 203]) / 255.0, + array([0, 0, 187]) / 255.0, + array([0, 44, 204]) / 255.0, + array([0, 88, 221]) / 255.0, + array([0, 132, 238]) / 255.0, + array([0, 175, 255]) / 255.0, + array([0, 235, 255]) / 255.0, + array([39, 255, 215]) / 255.0, + array([99, 255, 155]) / 255.0, + array([163, 255, 91]) / 255.0, + array([211, 255, 43]) / 255.0, + array([255, 255, 0]) / 255.0, + array([255, 207, 0]) / 255.0, + array([255, 159, 0]) / 255.0, + array([255, 111, 0]) / 255.0, + array([255, 63, 0]) / 255.0, + array([255, 0, 0]) / 255.0, + array([216, 0, 15]) / 255.0, + array([178, 0, 31]) / 255.0, + array([140, 0, 47]) / 255.0, + array([102, 0, 63]) / 255.0, + array([52, 52, 52]) / 255.0, + array([96, 96, 96]) / 255.0, + array([140, 140, 140]) / 255.0, + array([184, 184, 184]) / 255.0, + array([228, 228, 228]) / 255.0, + [1.0, 1.0, 1.0], + ] + # Colors = [ + # array([255, 140, 255]) / 255., + # array([221, 111, 242]) / 255., + # array([187, 82, 229]) / 255., + # array([153, 53, 216]) / 255., + # array([119, 24, 203]) / 255., + # array([0, 0, 187]) / 255., + # array([0, 44, 204]) / 255., + # array([0, 88, 221]) / 255., + # array([0, 132, 238]) / 255., + # array([0, 175, 255]) / 255., + # array([0, 235, 255]) / 255., + # array([39, 255, 215]) / 255., + # array([99, 255, 155]) / 255., + # array([163, 255, 91]) / 255., + # array([211, 255, 43]) / 255., + # array([255, 255, 0]) / 255., + # array([255, 207, 0]) / 255., + # array([255, 159, 0]) / 255., + # array([255, 111, 0]) / 255., + # array([255, 63, 0]) / 255., + # array([255, 0, 0]) / 255., + # array([216, 0, 15]) / 255., + # array([178, 0, 31]) / 255., + # array([140, 0, 47]) / 255., + # array([102, 0, 63]) / 255., + # array([52, 52, 52]) / 255., + # array([52, 52, 52]) / 255., + # array([52, 52, 52]) / 255., + # array([52, 52, 52]) / 255., + # array([52, 52, 52]) / 255., + # array([52, 52, 52]) / 255., + # array([96, 96, 96]) / 255., + # array([96, 96, 96]) / 255., + # array([96, 96, 96]) / 255., + # array([96, 96, 96]) / 255., + # array([96, 96, 96]) / 255., + # array([96, 96, 96]) / 255. + # ] TNcmap = ListedColormap(Colors) TNcmap.set_under([1, 1, 1]) TNcmap.set_over([0, 0, 0]) return TNcmap -def tolnet_plot(dset, var='O3MR', units='ppbv', tolnet_cmap=True, **kwargs): +def tolnet_plot(dset, var="O3MR", units="ppbv", tolnet_cmap=True, **kwargs): import matplotlib.pyplot as plt import seaborn as sns - sns.set_context('notebook') + + sns.set_context("notebook") cmap = tolnet_colormap() Fig, Ax = plt.subplots(figsize=(9, 6)) dsett = dset.copy() - dsett['z'] /= 1000. # put in km - dsett[var].attrs['units'] = units + dsett["z"] /= 1000.0 # put in km + dsett[var].attrs["units"] = units if tolnet_cmap: - levels = [0.001, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 125, 150, 200, 300,600] - dsett[var].plot(x='time', y='z', cmap=cmap, levels=levels, ax=Ax) + levels = [ + 0.001, + 4, + 8, + 12, + 16, + 20, + 24, + 28, + 32, + 36, + 40, + 44, + 48, + 52, + 56, + 60, + 64, + 68, + 72, + 76, + 80, + 84, + 88, + 92, + 96, + 100, + 125, + 150, + 200, + 300, + 600, + ] + dsett[var].plot(x="time", y="z", cmap=cmap, levels=levels, ax=Ax) else: - dsett[var].plot(x='time', y='z', **kwargs) + dsett[var].plot(x="time", y="z", **kwargs) plt.ylabel("Altitude [km]") plt.xlabel("Time [UTC]") sns.despine() @@ -121,7 +157,7 @@ def tolnet_plot(dset, var='O3MR', units='ppbv', tolnet_cmap=True, **kwargs): # plt.colorbar(label="O3 [ppbv]") -class TOLNet(object): +class TOLNet: """Short summary. Attributes @@ -140,10 +176,9 @@ class TOLNet(object): """ def __init__(self): - self.objtype = 'TOLNET' + self.objtype = "TOLNET" self.cwd = os.getcwd() - self.dates = pd.date_range( - start='2017-09-25', end='2017-09-26', freq='H') + self.dates = pd.date_range(start="2017-09-25", end="2017-09-26", freq="H") self.dset = None self.daily = False @@ -162,9 +197,10 @@ def add_data(self, fname): """ from h5py import File + f = File(fname) - atts = f['INSTRUMENT_ATTRIBUTES'] - data = f['DATA'] + atts = f["INSTRUMENT_ATTRIBUTES"] + data = f["DATA"] self.dset = self.make_xarray_dataset(data, atts) return self.dset @@ -186,62 +222,54 @@ def make_xarray_dataset(data, atts): """ from numpy import array, ndarray + # altitude variables - alt = data['ALT'][:].squeeze() - altvars = [ - 'AirND', 'AirNDUncert', 'ChRange', 'Press', 'Temp', 'TempUncert', - 'PressUncert' - ] + alt = data["ALT"][:].squeeze() + altvars = ["AirND", "AirNDUncert", "ChRange", "Press", "Temp", "TempUncert", "PressUncert"] # time variables tseries = pd.Series(data["TIME_MID_UT_UNIX"][:].squeeze()) - time = pd.Series(pd.to_datetime(tseries, unit='ms'), name='time') + time = pd.Series(pd.to_datetime(tseries, unit="ms"), name="time") # all other variables - ovars = [ - 'O3MR', 'O3ND', 'O3NDUncert', 'O3MRUncert', 'O3NDResol', - 'Precision' - ] + ovars = ["O3MR", "O3ND", "O3NDUncert", "O3MRUncert", "O3NDResol", "Precision"] dataset = xr.Dataset() - dataset['z'] = (('z'), alt) - dataset['time'] = (('time'), time) - dataset['x'] = (('x'), [0]) - dataset['y'] = (('y'), [0]) + dataset["z"] = (("z"), alt) + dataset["time"] = (("time"), time) + dataset["x"] = (("x"), [0]) + dataset["y"] = (("y"), [0]) for i in ovars: if data[i].shape == (len(alt), len(time)): - dataset[i] = (('z', 'time'), data[i][:]) + dataset[i] = (("z", "time"), data[i][:]) elif data[i].shape == (len(alt), 1): - dataset[i] = (('z'), data[i][:].squeeze()) + dataset[i] = (("z"), data[i][:].squeeze()) else: - dataset[i] = (('time'), data[i][:].squeeze()) + dataset[i] = (("time"), data[i][:].squeeze()) dataset[i] = dataset[i].where(dataset[i] > -990) for i in altvars: # print(i) - dataset[i] = (('z'), data[i][:].squeeze()) + dataset[i] = (("z"), data[i][:].squeeze()) for i in list(atts.attrs.keys()): # print(type(atts.attrs[i])) - if isinstance(atts.attrs[i], list) or isinstance( - atts.attrs[i], ndarray): + if isinstance(atts.attrs[i], list) or isinstance(atts.attrs[i], ndarray): # print('here') dataset.attrs[i] = atts.attrs[i][0] else: dataset.attrs[i] = atts.attrs[i] # print(dataset) - a, b = dataset.Location_Latitude.decode('ascii').split() - if b == 'S': + a, b = dataset.Location_Latitude.decode("ascii").split() + if b == "S": latitude = -1 * float(a) else: latitude = float(a) - a, b = dataset.Location_Longitude.decode('ascii').split() - if b == 'W': + a, b = dataset.Location_Longitude.decode("ascii").split() + if b == "W": longitude = -1 * float(a) else: longitude = float(a) # dataset = dataset.expand_dims('x') # dataset = dataset.expand_dims('y') - dataset.coords['latitude'] = (('y', 'x'), array(latitude).reshape( - 1, 1)) - dataset.coords['longitude'] = (('y', 'x'), array(longitude).reshape( - 1, 1)) + dataset.coords["latitude"] = (("y", "x"), array(latitude).reshape(1, 1)) + dataset.coords["longitude"] = (("y", "x"), array(longitude).reshape(1, 1)) return dataset diff --git a/monetio/profile/umbc_aerosol.py b/monetio/profile/umbc_aerosol.py index 512e7acf..b9839676 100644 --- a/monetio/profile/umbc_aerosol.py +++ b/monetio/profile/umbc_aerosol.py @@ -1,5 +1,4 @@ import os -from builtins import object import pandas as pd import xarray as xr @@ -12,15 +11,17 @@ def open_dataset(fname): def open_mfdataset(fname): from glob import glob + from numpy import sort + t = CL51() dsets = [] for i in sort(glob(fname)): dsets.append(t.add_data(i)) - return xr.concat(dsets, dim='time') + return xr.concat(dsets, dim="time") -class CL51(object): +class CL51: """Short summary. Attributes @@ -39,10 +40,9 @@ class CL51(object): """ def __init__(self): - self.objtype = 'TOLNET' + self.objtype = "TOLNET" self.cwd = os.getcwd() - self.dates = pd.date_range( - start='2017-09-25', end='2017-09-26', freq='H') + self.dates = pd.date_range(start="2017-09-25", end="2017-09-26", freq="H") self.dset = None self.daily = False @@ -61,9 +61,10 @@ def add_data(self, fname): """ from h5py import File + f = File(fname) - atts = f['Instrument_Attributes'] - data = f['DATA'] + atts = f["Instrument_Attributes"] + data = f["DATA"] self.dset = self.make_xarray_dataset(data, atts) return self.dset @@ -85,26 +86,26 @@ def make_xarray_dataset(data, atts): """ from numpy import array, ndarray + # altitude variables - alt = data['Altitude_m'][:].squeeze() + alt = data["Altitude_m"][:].squeeze() # time variables - time = pd.to_datetime(data['UnixTime_UTC'][:], unit='s') + time = pd.to_datetime(data["UnixTime_UTC"][:], unit="s") # Back Scatter - bsc = data['Profile_bsc'][:] + bsc = data["Profile_bsc"][:] dataset = xr.Dataset() - dataset['z'] = (('z'), alt) - dataset['time'] = (('time'), time) - dataset['x'] = (('x'), [0]) - dataset['y'] = (('y'), [0]) + dataset["z"] = (("z"), alt) + dataset["time"] = (("time"), time) + dataset["x"] = (("x"), [0]) + dataset["y"] = (("y"), [0]) - dataset['bsc'] = (('time', 'z'), bsc) + dataset["bsc"] = (("time", "z"), bsc) for i in list(atts.attrs.keys()): # print(type(atts.attrs[i])) - if isinstance(atts.attrs[i], list) or isinstance( - atts.attrs[i], ndarray): + if isinstance(atts.attrs[i], list) or isinstance(atts.attrs[i], ndarray): # print('here') dataset.attrs[i] = atts.attrs[i][0] else: @@ -116,8 +117,6 @@ def make_xarray_dataset(data, atts): a = dataset.Location_lon.astype(float) longitude = float(a) - dataset.coords['latitude'] = (('y', 'x'), array(latitude).reshape( - 1, 1)) - dataset.coords['longitude'] = (('y', 'x'), array(longitude).reshape( - 1, 1)) + dataset.coords["latitude"] = (("y", "x"), array(latitude).reshape(1, 1)) + dataset.coords["longitude"] = (("y", "x"), array(longitude).reshape(1, 1)) return dataset diff --git a/monetio/sat/__init__.py b/monetio/sat/__init__.py index 4b67e357..4ef21387 100644 --- a/monetio/sat/__init__.py +++ b/monetio/sat/__init__.py @@ -1,4 +1,4 @@ -from . import modis_ornl, nesdis_edr_viirs, nesdis_eps_viirs, nesdis_frp, goes +from . import goes, modis_ornl, nesdis_edr_viirs, nesdis_eps_viirs, nesdis_frp __all__ = ["nesdis_edr_viirs", "nesdis_eps_viirs", "nesdis_frp", "modis_ornl", "goes"] diff --git a/monetio/sat/goes.py b/monetio/sat/goes.py index e5a4aa6f..13cfa2da 100644 --- a/monetio/sat/goes.py +++ b/monetio/sat/goes.py @@ -1,17 +1,19 @@ """ this will read the goes_r data""" -import xarray as xr import pandas as pd +import xarray as xr try: import s3fs has_s3fs = True except ImportError: - print("Please install s3fs if retrieving from the Amazon S3 Servers. Otherwise continue with local data") + print( + "Please install s3fs if retrieving from the Amazon S3 Servers. Otherwise continue with local data" + ) has_s3fs = False try: - import h5py + import h5py # noqa: F401 has_h5py = True except ImportError: @@ -19,7 +21,7 @@ has_h5py = False try: - import h5netcdf + import h5netcdf # noqa: F401 has_h5netcdf = True except ImportError: @@ -82,25 +84,25 @@ def open_dataset(date=None, filename=None, satellite="16", product=None): return ds -class GOES(object): +class GOES: def __init__(self): self.date = None self.satellite = "16" self.product = "ABI-L2-AODF" - self.baseurl = "s3://noaa-goes{}/".format(self.satellite) - self.url = "{}".format(self.baseurl) + self.baseurl = f"s3://noaa-goes{self.satellite}/" + self.url = f"{self.baseurl}" self.filename = None self.fs = None def _update_baseurl(self): - self.baseurl = "s3://noaa-goes{}/".format(self.satellite) + self.baseurl = f"s3://noaa-goes{self.satellite}/" def set_product(self, product=None): try: if product is None: raise ValueError else: - self.url = "{}{}/".format(self.baseurl, product) + self.url = f"{self.baseurl}{product}/" except ValueError: print("kwarg product must have a value") @@ -111,7 +113,7 @@ def get_products(self): def date_to_url(self): date = pd.Timestamp(self.date) date_url_bit = date.strftime("%Y/%j/%H/") - self.url = "{}{}".format(self.url, date_url_bit) + self.url = f"{self.url}{date_url_bit}" def _get_files(self, url=None): try: @@ -161,7 +163,7 @@ def open_amazon_file(self, date=None, product=None, satellite="16"): self._update_baseurl() self._set_s3fs() self.product = self._product_exists(product) - self.url = "{}{}/".format(self.baseurl, self.product) # add product to url + self.url = f"{self.baseurl}{self.product}/" # add product to url self.date_to_url() # add date to url # find closest file to give date @@ -175,8 +177,8 @@ def open_amazon_file(self, date=None, product=None, satellite="16"): return out def _get_grid(self, ds): + from numpy import meshgrid, ndarray from pyproj import CRS, Proj - from numpy import ndarray, meshgrid proj_dict = ds.goes_imager_projection.attrs for i in proj_dict.keys(): @@ -195,7 +197,7 @@ def _get_grid(self, ds): ds = ds.set_coords(["latitude", "longitude"]) return ds - def open_local(self): + def open_local(self, f): # open file object fo = self.fs.open(f) out = xr.open_dataset(fo, engine="h5netcdf") diff --git a/monetio/sat/lpdaac_download.py b/monetio/sat/lpdaac_download.py index aa148d39..12979dc4 100644 --- a/monetio/sat/lpdaac_download.py +++ b/monetio/sat/lpdaac_download.py @@ -13,6 +13,7 @@ import time from getpass import getpass from netrc import netrc + # Load necessary packages into Python from subprocess import Popen @@ -20,18 +21,13 @@ # ----------------------------------USER-DEFINED VARIABLES--------------------------------------- # # Set up command line arguments -parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument( - '-dir', - '--directory', - required=True, - help='Specify directory to save files to') +parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-dir", "--directory", required=True, help="Specify directory to save files to") parser.add_argument( - '-f', - '--files', + "-f", + "--files", required=True, - help='A single granule URL, or the location of textfile containing granule URLs' + help="A single granule URL, or the location of textfile containing granule URLs", ) args = parser.parse_args() @@ -39,21 +35,21 @@ # Define file(s) to download from the LP DAAC Data Pool files = args.files prompts = [ - 'Enter NASA Earthdata Login Username \n(or create an account at urs.earthdata.nasa.gov): ', - 'Enter NASA Earthdata Login Password: ' + "Enter NASA Earthdata Login Username \n(or create an account at urs.earthdata.nasa.gov): ", + "Enter NASA Earthdata Login Password: ", ] # ---------------------------------SET UP WORKSPACE---------------------------------------------- # # Create a list of files to download based on input type of files above -if files.endswith('.txt'): - fileList = open(files, 'r').readlines() # If input is textfile w file URLs +if files.endswith(".txt"): + fileList = open(files).readlines() # If input is textfile w file URLs elif isinstance(files, str): fileList = [files] # If input is a single file # Generalize download directory -if saveDir[-1] != '/' and saveDir[-1] != '\\': +if saveDir[-1] != "/" and saveDir[-1] != "\\": saveDir = saveDir.strip("'").strip('"') + os.sep -urs = 'urs.earthdata.nasa.gov' # Address to call for authentication +urs = "urs.earthdata.nasa.gov" # Address to call for authentication # --------------------------------AUTHENTICATION CONFIGURATION----------------------------------- # # Determine if netrc file exists, and if so, if it includes NASA Earthdata Login Credentials @@ -65,39 +61,33 @@ except FileNotFoundError: homeDir = os.path.expanduser("~") Popen( - 'touch {0}.netrc | chmod og-rw {0}.netrc | echo machine {1} >> {0}.netrc' - .format(homeDir + os.sep, urs), - shell=True) + "touch {0}.netrc | chmod og-rw {0}.netrc | echo machine {1} >> {0}.netrc".format( + homeDir + os.sep, urs + ), + shell=True, + ) + Popen(f"echo login {getpass(prompt=prompts[0])} >> {homeDir + os.sep}.netrc", shell=True) Popen( - 'echo login {} >> {}.netrc'.format( - getpass(prompt=prompts[0]), homeDir + os.sep), - shell=True) - Popen( - 'echo password {} >> {}.netrc'.format( - getpass(prompt=prompts[1]), homeDir + os.sep), - shell=True) + f"echo password {getpass(prompt=prompts[1])} >> {homeDir + os.sep}.netrc", + shell=True, + ) # Determine OS and edit netrc file if it exists but is not set up for NASA Earthdata Login except TypeError: homeDir = os.path.expanduser("~") + Popen(f"echo machine {urs} >> {homeDir + os.sep}.netrc", shell=True) + Popen(f"echo login {getpass(prompt=prompts[0])} >> {homeDir + os.sep}.netrc", shell=True) Popen( - 'echo machine {1} >> {0}.netrc'.format(homeDir + os.sep, urs), - shell=True) - Popen( - 'echo login {} >> {}.netrc'.format( - getpass(prompt=prompts[0]), homeDir + os.sep), - shell=True) - Popen( - 'echo password {} >> {}.netrc'.format( - getpass(prompt=prompts[1]), homeDir + os.sep), - shell=True) + f"echo password {getpass(prompt=prompts[1])} >> {homeDir + os.sep}.netrc", + shell=True, + ) # Delay for up to 1 minute to allow user to submit username and password before continuing tries = 0 while tries < 30: try: netrc(netrcDir).authenticators(urs)[2] - except: + except Exception: time.sleep(2.0) tries += 1 @@ -106,31 +96,33 @@ for f in fileList: if not os.path.exists(saveDir): os.makedirs(saveDir) - saveName = os.path.join(saveDir, f.split('/')[-1].strip()) + saveName = os.path.join(saveDir, f.split("/")[-1].strip()) # Create and submit request and download file with requests.get( - f.strip(), - stream=True, - auth=(netrc(netrcDir).authenticators(urs)[0], - netrc(netrcDir).authenticators(urs)[2])) as response: + f.strip(), + stream=True, + auth=(netrc(netrcDir).authenticators(urs)[0], netrc(netrcDir).authenticators(urs)[2]), + ) as response: if response.status_code != 200: print( - "{} not downloaded. Verify that your username and password are correct in {}" - .format(f.split('/')[-1].strip(), netrcDir)) + "{} not downloaded. Verify that your username and password are correct in {}".format( + f.split("/")[-1].strip(), netrcDir + ) + ) else: response.raw.decode_content = True content = response.raw - with open(saveName, 'wb') as d: + with open(saveName, "wb") as d: while True: chunk = content.read(16 * 1024) if not chunk: break d.write(chunk) - print('Downloaded file: {}'.format(saveName)) + print(f"Downloaded file: {saveName}") def download_file(saveDir, netrcDir): if not os.path.exists(saveDir): os.makedirs(saveDir) - saveName = os.path.join(saveDir, f.split('/')[-1].strip()) + # saveName = os.path.join(saveDir, f.split("/")[-1].strip()) diff --git a/monetio/sat/modis_ornl.py b/monetio/sat/modis_ornl.py index 1dfbca12..0f5a201f 100644 --- a/monetio/sat/modis_ornl.py +++ b/monetio/sat/modis_ornl.py @@ -23,19 +23,21 @@ from dask.diagnostics import ProgressBar try: - from suds.client import * + from suds.client import Client + has_suds = True except ImportError: has_suds = False + DEBUG_PRINTING = False -defaultURL = 'https://modis.ornl.gov/cgi-bin/MODIS/soapservice/MODIS_soapservice.wsdl' +defaultURL = "https://modis.ornl.gov/cgi-bin/MODIS/soapservice/MODIS_soapservice.wsdl" pbar = ProgressBar() pbar.register() -class modisData(object): +class modisData: def __init__(self): self.server = None @@ -67,12 +69,12 @@ def __init__(self): def getFilename(self): - d = '.' + d = "." fn = self.product fn = fn + d + self.band - fn = fn + d + 'LAT__' + str(self.latitude) - fn = fn + d + 'LON__' + str(self.longitude) + fn = fn + d + "LAT__" + str(self.latitude) + fn = fn + d + "LON__" + str(self.longitude) fn = fn + d + self.dateStr[0] fn = fn + d + self.dateStr[-1] fn = fn + d + str(int(self.nrows)) @@ -82,9 +84,9 @@ def getFilename(self): def pickle(self): - fn = self.getFilename() + '.' + 'pkl' + fn = self.getFilename() + "." + "pkl" - f = open(fn, 'w') + f = open(fn, "w") pickle.dump(self, f) f.close() @@ -97,8 +99,8 @@ def applyScale(self): def filterQA(self, QAOK, fill=np.nan): if np.size(self.data) != np.size(self.QA): - # should do this using an exception - print >> sys.stderr, 'data and QA are different sizes' + # TODO: should do this using an exception + print("data and QA are different sizes", file=sys.stderr) sys.exit() r = np.shape(self.data)[0] @@ -112,14 +114,14 @@ def filterQA(self, QAOK, fill=np.nan): def __getDummyDateList(): """ - Generate a dummy date list for testing without - hitting the server - """ + Generate a dummy date list for testing without + hitting the server + """ D = [] for y in range(2001, 2010): for d in range(1, 365, 1): - D.append('A%04d%03d' % (y, d)) + D.append("A%04d%03d" % (y, d)) return D @@ -129,21 +131,21 @@ def __error(msg): def latLonErr(): - __error('Latitude and longitude must both be specified') + __error("Latitude and longitude must both be specified") def serverDataErr(): - __error('Server not returning data (possibly busy)') + __error("Server not returning data (possibly busy)") def mkIntDate(s): """ - Convert the webserver formatted dates - to an integer format by stripping the - leading char and casting - """ + Convert the webserver formatted dates + to an integer format by stripping the + leading char and casting + """ n = s.__len__() - d = int(s[-(n - 1):n]) + d = int(s[-(n - 1) : n]) return d @@ -153,50 +155,45 @@ def setClient(wsdlurl=defaultURL): return Client(wsdlurl) -def printList(l): +def printList(lst): - for i in range(l.__len__()): - print(l[i]) + for i in range(lst.__len__()): + print(lst[i]) def printModisData(m): - print('server:', m.server) - print('product:', m.product) - print('latitude:', m.latitude) - print('longitude:', m.longitude) + print("server:", m.server) + print("product:", m.product) + print("latitude:", m.latitude) + print("longitude:", m.longitude) - print('band:', m.band) - print('nrows:', m.nrows) - print('ncols:', m.ncols) - print('cellsize:', m.cellsize) - print('scale:', m.scale) - print('units:', m.units) - print('xllcorner:', m.yllcorner) - print('yllcorner:', m.xllcorner) + print("band:", m.band) + print("nrows:", m.nrows) + print("ncols:", m.ncols) + print("cellsize:", m.cellsize) + print("scale:", m.scale) + print("units:", m.units) + print("xllcorner:", m.yllcorner) + print("yllcorner:", m.xllcorner) - print('kmAboveBelow:', m.kmAboveBelow) - print('kmLeftRight:', m.kmLeftRight) + print("kmAboveBelow:", m.kmAboveBelow) + print("kmLeftRight:", m.kmLeftRight) - print('dates:', m.dateStr) + print("dates:", m.dateStr) - print('QA:', m.QA) + print("QA:", m.QA) print(m.data) def __debugPrint(o): if DEBUG_PRINTING: - print >> sys.stderr, 'DB> ', o + print("DB> ", o, file=sys.stderr) sys.stderr.flush -def modisGetQA(m, - QAname, - client=None, - chunkSize=8, - kmAboveBelow=0, - kmLeftRight=0): +def modisGetQA(m, QAname, client=None, chunkSize=8, kmAboveBelow=0, kmLeftRight=0): startDate = m.dateInt[0] endDate = m.dateInt[-1] @@ -211,24 +208,27 @@ def modisGetQA(m, endDate=endDate, chunkSize=chunkSize, kmAboveBelow=kmAboveBelow, - kmLeftRight=kmLeftRight) + kmLeftRight=kmLeftRight, + ) m.QA = copy(q.data) -def modisClient(client=None, - product=None, - band=None, - lat=None, - lon=None, - startDate=None, - endDate=None, - chunkSize=8, - kmAboveBelow=0, - kmLeftRight=0): +def modisClient( + client=None, + product=None, + band=None, + lat=None, + lon=None, + startDate=None, + endDate=None, + chunkSize=8, + kmAboveBelow=0, + kmLeftRight=0, +): + """ + modisClient: function for building a modisData object """ - modisClient: function for building a modisData object - """ m = modisData() @@ -304,8 +304,7 @@ def modisClient(client=None, j = min(chunkSize, dateList.__len__() - i) - __debugPrint( - 'i=%d, j=%d, dateList__len__()=%d' % (i, j, dateList.__len__())) + __debugPrint("i=%d, j=%d, dateList__len__()=%d" % (i, j, dateList.__len__())) while mkIntDate(dateList[i + j - 1]) > endDate: j = j - 1 @@ -314,8 +313,9 @@ def modisClient(client=None, # print >> sys.stderr, requestStart, requestEnd - data = client.service.getsubset(lat, lon, product, band, requestStart, - requestEnd, kmAboveBelow, kmLeftRight) + data = client.service.getsubset( + lat, lon, product, band, requestStart, requestEnd, kmAboveBelow, kmLeftRight + ) # print(data) # now fill up the data structure with the returned data... @@ -346,7 +346,7 @@ def modisClient(client=None, n = n + 1 - return (m) + return m def _nearest(items, pivot): @@ -361,26 +361,28 @@ def get_available_products(): # print(prodList) -def get_available_bands(product='MOD12A2H'): +def get_available_bands(product="MOD12A2H"): client = setClient() print(client.service.getbands(product)) -def _get_single_retrieval(date, - product='MOD12A2H', - band='Lai_500m', - quality_control=None, - lat=0, - lon=0, - kmAboveBelow=100, - kmLeftRight=100): +def _get_single_retrieval( + date, + product="MOD12A2H", + band="Lai_500m", + quality_control=None, + lat=0, + lon=0, + kmAboveBelow=100, + kmLeftRight=100, +): import pandas as pd + client = setClient() # prodList = modisClient(client) # bandList = modisClient(client, product='MOD15A2H') - dateList = modisClient( - client, product=product, band=band, lat=lat, lon=lon) - dates = pd.to_datetime(dateList, format='A%Y%j') + dateList = modisClient(client, product=product, band=band, lat=lat, lon=lon) + dates = pd.to_datetime(dateList, format="A%Y%j") date = pd.to_datetime(date) if isinstance(date, pd.Timestamp): dates = _nearest(dates, date) @@ -390,10 +392,11 @@ def _get_single_retrieval(date, band=band, lat=lat, lon=lon, - startDate=int(dates.strftime('%Y%j')), - endDate=int((dates + pd.Timedelta(1, units='D')).strftime('%Y%j')), + startDate=int(dates.strftime("%Y%j")), + endDate=int((dates + pd.Timedelta(1, units="D")).strftime("%Y%j")), kmAboveBelow=kmAboveBelow, - kmLeftRight=kmLeftRight) + kmLeftRight=kmLeftRight, + ) else: m = modisClient( client, @@ -401,24 +404,22 @@ def _get_single_retrieval(date, band=band, lat=lat, lon=lon, - startDate=int(dates.min().strftime('%Y%j')), - endDate=int(date.max().strftime('%Y%j')), + startDate=int(dates.min().strftime("%Y%j")), + endDate=int(date.max().strftime("%Y%j")), kmAboveBelow=kmAboveBelow, - kmLeftRight=kmLeftRight) + kmLeftRight=kmLeftRight, + ) if quality_control is not None: modisGetQA( - m, - quality_control, - client=client, - kmAboveBelow=kmAboveBelow, - kmLeftRight=kmLeftRight) + m, quality_control, client=client, kmAboveBelow=kmAboveBelow, kmLeftRight=kmLeftRight + ) m.applyScale() return m def _fix_array(m): - return m.data.reshape(m.ncols, m.nrows, order='C')[::-1, :] + return m.data.reshape(m.ncols, m.nrows, order="C")[::-1, :] def _make_xarray_dataarray(m): @@ -437,21 +438,20 @@ def _make_xarray_dataarray(m): """ import xarray as xr from pandas import to_datetime - da = xr.DataArray( - m.data.reshape(m.ncols, m.nrows, order='C')[::-1, :], dims=('x', 'y')) - da.attrs['long_name'] = m.band - da.attrs['product'] = m.product - da.attrs['cellsize'] = m.cellsize - da.attrs['units'] = m.units - da.attrs['server'] = m.server - lon, lat = _get_latlon(m.xllcorner, m.yllcorner, m.cellsize, m.ncols, - m.nrows) + + da = xr.DataArray(m.data.reshape(m.ncols, m.nrows, order="C")[::-1, :], dims=("x", "y")) + da.attrs["long_name"] = m.band + da.attrs["product"] = m.product + da.attrs["cellsize"] = m.cellsize + da.attrs["units"] = m.units + da.attrs["server"] = m.server + lon, lat = _get_latlon(m.xllcorner, m.yllcorner, m.cellsize, m.ncols, m.nrows) da.name = m.band - da['time'] = to_datetime(str(m.dateInt[0]), format='%Y%j') + da["time"] = to_datetime(str(m.dateInt[0]), format="%Y%j") # print(da) # print(lon.shape) - da.coords['longitude'] = (('x', 'y'), lon) - da.coords['latitude'] = (('x', 'y'), lat) + da.coords["longitude"] = (("x", "y"), lon) + da.coords["latitude"] = (("x", "y"), lat) return da @@ -477,10 +477,10 @@ def _get_latlon(xll, yll, cell_width, nx, ny): returns the 2d arrays of lon, lat """ - from pyproj import Proj from numpy import linspace, meshgrid - sinu = Proj( - '+proj=sinu +a=6371007.181 +b=6371007.181 +units=m +R=6371007.181') + from pyproj import Proj + + sinu = Proj("+proj=sinu +a=6371007.181 +b=6371007.181 +units=m +R=6371007.181") x = linspace(xll, xll + cell_width * nx, nx) y = linspace(yll, yll + cell_width * ny, ny) xx, yy = meshgrid(x, y) @@ -488,20 +488,23 @@ def _get_latlon(xll, yll, cell_width, nx, ny): return lon, lat -def open_dataset(date, - product='MOD12A2H', - band='Lai_500m', - quality_control=None, - latitude=0, - longitude=0, - kmAboveBelow=100, - kmLeftRight=100): +def open_dataset( + date, + product="MOD12A2H", + band="Lai_500m", + quality_control=None, + latitude=0, + longitude=0, + kmAboveBelow=100, + kmLeftRight=100, +): import pandas as pd + try: if has_suds is False: raise ImportError except ImportError: - print('Please install a suds client') + print("Please install a suds client") date = pd.to_datetime(date) m = _get_single_retrieval( date, @@ -511,34 +514,43 @@ def open_dataset(date, lat=latitude, lon=longitude, kmAboveBelow=kmAboveBelow, - kmLeftRight=kmLeftRight) + kmLeftRight=kmLeftRight, + ) da = _make_xarray_dataarray(m) return da -def open_mfdataset(dates, - product='MOD12A2H', - band='Lai_500m', - quality_control=None, - latitude=0, - longitude=0, - kmAboveBelow=100, - kmLeftRight=100): +def open_mfdataset( + dates, + product="MOD12A2H", + band="Lai_500m", + quality_control=None, + latitude=0, + longitude=0, + kmAboveBelow=100, + kmLeftRight=100, +): + import dask import pandas as pd import xarray as xr - import dask + dates = pd.to_datetime(dates) od = dask.delayed(open_dataset) - das = dask.delayed([ - od(i, - product=product, - band=band, - quality_control=quality_control, - latitude=latitude, - longitude=longitude, - kmAboveBelow=kmAboveBelow, - kmLeftRight=kmLeftRight) for i in dates - ]) - da = xr.concat(das.compute(), dim='time') - da['time'] = dates + das = dask.delayed( + [ + od( + i, + product=product, + band=band, + quality_control=quality_control, + latitude=latitude, + longitude=longitude, + kmAboveBelow=kmAboveBelow, + kmLeftRight=kmLeftRight, + ) + for i in dates + ] + ) + da = xr.concat(das.compute(), dim="time") + da["time"] = dates return da diff --git a/monetio/sat/nasa_modis.py b/monetio/sat/nasa_modis.py index 8af07ff6..fed052b4 100644 --- a/monetio/sat/nasa_modis.py +++ b/monetio/sat/nasa_modis.py @@ -1,19 +1,21 @@ # MODIS Swath data """ this will read the modis data""" import xarray as xr + from ..grids import get_modis_latlon_from_swath_hv, get_sinu_area_def def _get_swath_from_fname(fname): - vert_grid_num = fname.split('.')[-4].split('v')[-1] - hori_grid_num = fname.split('.')[-4].split('v')[0].split('h')[-1] + vert_grid_num = fname.split(".")[-4].split("v")[-1] + hori_grid_num = fname.split(".")[-4].split("v")[0].split("h")[-1] return hori_grid_num, vert_grid_num def _get_time_from_fname(fname): import pandas as pd - u = pd.Series([fname.split('.')[-2]]) - date = pd.to_datetime(u, format='%Y%j%H%M%S')[0] + + u = pd.Series([fname.split(".")[-2]]) + date = pd.to_datetime(u, format="%Y%j%H%M%S")[0] return date @@ -25,12 +27,12 @@ def open_single_file(fname): # open the dataset dset = xr.open_dataset(fname) # rename x and y dimensions - dset = dset.rename({'XDim:MOD_Grid_BRDF': 'x', 'YDim:MOD_Grid_BRDF': 'y'}) + dset = dset.rename({"XDim:MOD_Grid_BRDF": "x", "YDim:MOD_Grid_BRDF": "y"}) # get lat lon from dset and h, v dset = get_modis_latlon_from_swath_hv(h, v, dset) # get the area_def - dset.attrs['area'] = get_sinu_area_def(dset) + dset.attrs["area"] = get_sinu_area_def(dset) # set the time - dset['time'] = timestamp + dset["time"] = timestamp return dset diff --git a/monetio/sat/nasa_utils.py b/monetio/sat/nasa_utils.py index 2c8ef2a3..d042aff5 100644 --- a/monetio/sat/nasa_utils.py +++ b/monetio/sat/nasa_utils.py @@ -1,8 +1,10 @@ +import os + import requests class SessionWithHeaderRedirection(requests.Session): - """NASA Session genergator + """NASA Session generator Parameters ---------- @@ -19,8 +21,8 @@ class SessionWithHeaderRedirection(requests.Session): Description of attribute `AUTH_HOST`. """ - import requests - AUTH_HOST = 'urs.earthdata.nasa.gov' + + AUTH_HOST = "urs.earthdata.nasa.gov" def __init__(self, username, password): super().__init__() @@ -31,13 +33,15 @@ def __init__(self, username, password): def rebuild_auth(self, prepared_request, response): headers = prepared_request.headers url = prepared_request.url - if 'Authorization' in headers: + if "Authorization" in headers: original_parsed = requests.utils.urlparse(response.request.url) redirect_parsed = requests.utils.urlparse(url) if ( - original_parsed.hostname != redirect_parsed.hostname - ) and redirect_parsed.hostname != self.AUTH_HOST and original_parsed.hostname != self.AUTH_HOST: - del headers['Authorization'] + (original_parsed.hostname != redirect_parsed.hostname) + and redirect_parsed.hostname != self.AUTH_HOST + and original_parsed.hostname != self.AUTH_HOST + ): + del headers["Authorization"] return @@ -55,7 +59,7 @@ def get_nasa_data(username, password, filename): # raise an exception in case of http errors response.raise_for_status() # save the file - with open(filename, 'wb') as fd: + with open(filename, "wb") as fd: for chunk in response.iter_content(chunk_size=1024 * 1024): fd.write(chunk) except requests.exceptions.HTTPError as e: @@ -65,34 +69,27 @@ def get_nasa_data(username, password, filename): def get_filenames_http(archive_url, ext): + from bs4 import BeautifulSoup + r = requests.get(archive_url) - soup = BeautifulSoup(r.content, 'html.parser') - links = soup.findAll('a') - return [ - archive_url + link['href'] for link in links - if link['href'].endswith('%s' % ext) - ] + soup = BeautifulSoup(r.content, "html.parser") + links = soup.findAll("a") + return [archive_url + link["href"] for link in links if link["href"].endswith("%s" % ext)] -def get_available_satellites(archive_url='https://e4ftl01.cr.usgs.gov'): - return get_filenames_http(archive_url, '/') +def get_available_satellites(archive_url="https://e4ftl01.cr.usgs.gov"): + return get_filenames_http(archive_url, "/") -def get_available_product(archive_url='https://e4ftl01.cr.usgs.gov', - satellite=None): - url = '{}/{}'.format(archive_url, satellite) - return get_filenames_http(url, '/') +def get_available_product(archive_url="https://e4ftl01.cr.usgs.gov", satellite=None): + url = f"{archive_url}/{satellite}" + return get_filenames_http(url, "/") -def get_files_to_download(year, - doy, - tiles, - output_path, - ext, - sat='MOLA', - product='MYD09A1.006'): - from numpy import array +def get_files_to_download(year, doy, tiles, output_path, ext, sat="MOLA", product="MYD09A1.006"): import pandas as pd + from numpy import array + # startdd = datetime.datetime.strptime(start_date, '%Y-%m-%d').date() # enddd = datetime.datetime.strptime(end_date, '%Y-%m-%d').date() # num_days = (enddd - startdd).days @@ -101,9 +98,8 @@ def get_files_to_download(year, d = pd.Timesamp(year, doy) # doy = (dd - datetime.date(dd.year - 1, 12, 31)).days # year = dd.year - baseurl = 'https://e4ftl01.cr.usgs.gov' - archive_url = '{}/{}/{}/{}/'.format(baseurl, sat, product, - d.strftime('%Y.%m.%d')) + baseurl = "https://e4ftl01.cr.usgs.gov" + archive_url = "{}/{}/{}/{}/".format(baseurl, sat, product, d.strftime("%Y.%m.%d")) # archive_url = 'https://e4ftl01.cr.usgs.gov/MOLA/MYD09A1.006/%d.%02d.%02d/' # sat,product, d.strftime('%Y.%m.%d')) files = get_filenames_http(archive_url, ext) @@ -111,7 +107,7 @@ def get_files_to_download(year, # files_on_http = [] # for f in files: # files_on_http.append(f) - #for tile in tiles: + # for tile in tiles: # files_on_http.append( # [f for f in files if '%d%03d.%s' % (year, doy, tile) in f]) # files_on_http2 = set([str(x[0]) for x in files_on_http if x]) @@ -119,8 +115,6 @@ def get_files_to_download(year, # return list(files_on_http2 - files_on_system) # GET THE FILES NOT CURRENTLY ON THE SYSTEM basenames = [os.path.basename(f) for f in files] - files_on_system = [ - os.path.isfile("{}/{}".format(output_path, f)) for f in basenames - ] + files_on_system = [os.path.isfile(f"{output_path}/{f}") for f in basenames] files_to_download = array(files)[~array(files_on_system)] return files_to_download, basenames diff --git a/monetio/sat/nesdis_edr_viirs.py b/monetio/sat/nesdis_edr_viirs.py index bdfd9cfd..0eb1ee93 100644 --- a/monetio/sat/nesdis_edr_viirs.py +++ b/monetio/sat/nesdis_edr_viirs.py @@ -1,21 +1,20 @@ -import inspect import os -import xarray as xr +import xarray as xr -server = 'ftp.star.nesdis.noaa.gov' -base_dir = '/pub/smcd/jhuang/npp.viirs.aerosol.data/edraot550/' +server = "ftp.star.nesdis.noaa.gov" +base_dir = "/pub/smcd/jhuang/npp.viirs.aerosol.data/edraot550/" -def open_dataset(date, resolution='high', datapath='.'): - current = change_dir(datapath) +def open_dataset(date, resolution="high", datapath="."): + current = change_dir(datapath) # noqa: F841 # check resolution; by default 0.1 degree data is assumed - if resolution is 'high' or resolution is 'h': + if resolution in {"high", "h"}: # this is the 0.1 degree data nlat = 1800 nlon = 3600 lon, lat = _get_latlons(nlat, nlon) - fname, date = download_data(date, resolution='high') + fname, date = download_data(date, resolution="high") else: nlat = 720 nlon = 1440 @@ -28,37 +27,39 @@ def open_dataset(date, resolution='high', datapath='.'): return data -def open_mfdataset(dates, resolution='high', datapath='.'): - from xarray import concat +def open_mfdataset(dates, resolution="high", datapath="."): das = [] for i in dates: das.append(open_dataset(i, resolution=resolution, datapath=datapath)) - ds = concat(das, dim='time') + ds = xr.concat(das, dim="time") return ds def read_data(fname, lat, lon, date): - from numpy import fromfile, float32, nan + from numpy import float32, fromfile, nan from pandas import to_datetime + f = fromfile(fname, dtype=float32) nlat, nlon = lon.shape aot = f.reshape(2, nlat, nlon)[0, :, :].reshape(1, nlat, nlon) aot[aot < -999] = nan datearr = to_datetime([date]) - da = xr.DataArray(aot, coords=[datearr, range( - nlat), range(nlon)], dims=['time', 'y', 'x']) - da['latitude'] = (('y', 'x'), lat) - da['longitude'] = (('y', 'x'), lon) - da.attrs['units'] = '' - da.name = 'VIIRS EDR AOD' - da.attrs['long_name'] = 'Aerosol Optical Depth' - da.attrs['source'] = 'ftp://ftp.star.nesdis.noaa.gov/pub/smcd/jhuang/npp.viirs.aerosol.data/edraot550' + da = xr.DataArray(aot, coords=[datearr, range(nlat), range(nlon)], dims=["time", "y", "x"]) + da["latitude"] = (("y", "x"), lat) + da["longitude"] = (("y", "x"), lon) + da.attrs["units"] = "" + da.name = "VIIRS EDR AOD" + da.attrs["long_name"] = "Aerosol Optical Depth" + da.attrs[ + "source" + ] = "ftp://ftp.star.nesdis.noaa.gov/pub/smcd/jhuang/npp.viirs.aerosol.data/edraot550" return da def _unzip_file(fname): import subprocess - subprocess.run(['gunzip', '-f', fname]) + + subprocess.run(["gunzip", "-f", fname]) return fname[:-3] @@ -68,21 +69,23 @@ def change_dir(to_path): return current -def download_data(date, resolution='high'): +def download_data(date, resolution="high"): import ftplib from datetime import datetime + if isinstance(date, datetime): - year = date.strftime('%Y') - yyyymmdd = date.strftime('%Y%m%d') + year = date.strftime("%Y") + yyyymmdd = date.strftime("%Y%m%d") else: from pandas import Timestamp + date = Timestamp(date) - year = date.strftime('%Y') - yyyymmdd = date.strftime('%Y%m%d') - if resolution is 'high': - file = 'npp_aot550_edr_gridded_0.10_{}.high.bin.gz'.format(yyyymmdd) + year = date.strftime("%Y") + yyyymmdd = date.strftime("%Y%m%d") + if resolution == "high": + file = f"npp_aot550_edr_gridded_0.10_{yyyymmdd}.high.bin.gz" else: - file = 'npp_aot550_edr_gridded_0.25_{}.high.bin.gz'.format(yyyymmdd) + file = f"npp_aot550_edr_gridded_0.25_{yyyymmdd}.high.bin.gz" ftp = ftplib.FTP(server) ftp.login() # print(base_dir) @@ -90,16 +93,17 @@ def download_data(date, resolution='high'): # print(base_dir + year) ftp.cwd(base_dir + year) # print(file) - ftp.retrbinary("RETR " + file, open(file, 'wb').write) + ftp.retrbinary("RETR " + file, open(file, "wb").write) return file, date def _get_latlons(nlat, nlon): from numpy import linspace, meshgrid + lon_min = -179.875 lon_max = -1 * lon_min lat_min = -89.875 - lat_max = -1. * lat_min + lat_max = -1.0 * lat_min lons = linspace(lon_min, lon_max, nlon) lats = linspace(lat_min, lat_max, nlat) lon, lat = meshgrid(lons, lats) diff --git a/monetio/sat/nesdis_eps_viirs.py b/monetio/sat/nesdis_eps_viirs.py index 2ac68ee6..c59902c6 100644 --- a/monetio/sat/nesdis_eps_viirs.py +++ b/monetio/sat/nesdis_eps_viirs.py @@ -1,13 +1,12 @@ -import inspect import os import xarray as xr -server = 'ftp.star.nesdis.noaa.gov' -base_dir = '/pub/smcd/VIIRS_Aerosol/npp.viirs.aerosol.data/epsaot550/' +server = "ftp.star.nesdis.noaa.gov" +base_dir = "/pub/smcd/VIIRS_Aerosol/npp.viirs.aerosol.data/epsaot550/" -def open_dataset(date, datapath='.'): +def open_dataset(date, datapath="."): """Short summary. Parameters @@ -23,12 +22,11 @@ def open_dataset(date, datapath='.'): Description of returned object. """ - import six current = change_dir(datapath) nlat = 720 nlon = 1440 lon, lat = _get_latlons(nlat, nlon) - if isinstance(date, six.string_types): + if isinstance(date, str): fname, date = download_data(date) else: fname, date = download_data(date) @@ -38,7 +36,7 @@ def open_dataset(date, datapath='.'): return data.where(data > 0) -def open_mfdataset(dates, datapath='.'): +def open_mfdataset(dates, datapath="."): """Short summary. Parameters @@ -55,11 +53,12 @@ def open_mfdataset(dates, datapath='.'): """ from xarray import concat + das = [] for i in dates: print(i) das.append(open_dataset(i, datapath=datapath)) - ds = concat(das, dim='time') + ds = concat(das, dim="time") return ds @@ -83,21 +82,22 @@ def read_data(fname, lat, lon, date): Description of returned object. """ - from numpy import nan from pandas import to_datetime + f = xr.open_dataset(fname) datearr = to_datetime([date]) - da = f['aot_ip_out'] - da = da.rename({'nlat': 'y', 'nlon': 'x'}) - da['latitude'] = (('y', 'x'), lat) - da['longitude'] = (('y', 'x'), lon) - da = da.expand_dims('time') - da['time'] = datearr - da.attrs['units'] = '' - da.name = 'VIIRS EPS AOT' - da.attrs['long_name'] = 'Aerosol Optical Thickness' + da = f["aot_ip_out"] + da = da.rename({"nlat": "y", "nlon": "x"}) + da["latitude"] = (("y", "x"), lat) + da["longitude"] = (("y", "x"), lon) + da = da.expand_dims("time") + da["time"] = datearr + da.attrs["units"] = "" + da.name = "VIIRS EPS AOT" + da.attrs["long_name"] = "Aerosol Optical Thickness" da.attrs[ - 'source'] = 'ftp://ftp.star.nesdis.noaa.gov/pub/smcd/VIIRS_Aerosol/npp.viirs.aerosol.data/epsaot550' + "source" + ] = "ftp://ftp.star.nesdis.noaa.gov/pub/smcd/VIIRS_Aerosol/npp.viirs.aerosol.data/epsaot550" return da @@ -120,7 +120,7 @@ def change_dir(to_path): return current -def download_data(date, resolution='high'): +def download_data(date, resolution="high"): """Short summary. Parameters @@ -138,26 +138,29 @@ def download_data(date, resolution='high'): """ import ftplib from datetime import datetime + from pandas import DatetimeIndex + if isinstance(date, datetime) or isinstance(date, DatetimeIndex): - year = date.strftime('%Y') - yyyymmdd = date.strftime('%Y%m%d') + year = date.strftime("%Y") + yyyymmdd = date.strftime("%Y%m%d") else: from pandas import Timestamp + date = Timestamp(date) - year = date.strftime('%Y') - yyyymmdd = date.strftime('%Y%m%d') + year = date.strftime("%Y") + yyyymmdd = date.strftime("%Y%m%d") # npp_eaot_ip_gridded_0.25_20181222.high.nc # print(year, yyyymmdd) - file = 'npp_eaot_ip_gridded_0.25_{}.high.nc'.format(yyyymmdd) + file = f"npp_eaot_ip_gridded_0.25_{yyyymmdd}.high.nc" exists = os.path.isfile(file) if ~exists: ftp = ftplib.FTP(server) ftp.login() ftp.cwd(base_dir + year) - ftp.retrbinary("RETR " + file, open(file, 'wb').write) + ftp.retrbinary("RETR " + file, open(file, "wb").write) else: - print('File Already Exists! Reading: {}'.format(file)) + print(f"File Already Exists! Reading: {file}") return file, date @@ -178,10 +181,11 @@ def _get_latlons(nlat, nlon): """ from numpy import linspace, meshgrid + lon_min = -179.875 lon_max = -1 * lon_min lat_min = -89.875 - lat_max = -1. * lat_min + lat_max = -1.0 * lat_min lons = linspace(lon_min, lon_max, nlon) lats = linspace(lat_max, lat_min, nlat) lon, lat = meshgrid(lons, lats) diff --git a/monetio/sat/nesdis_frp.py b/monetio/sat/nesdis_frp.py index b39c693c..153dcdb2 100644 --- a/monetio/sat/nesdis_frp.py +++ b/monetio/sat/nesdis_frp.py @@ -3,25 +3,27 @@ base_dir = "https://gsce-dtn.sdstate.edu/index.php/s/e8wPYPOL1bGXk5z/download?path=%2F" -def download_data(date, ftype='meanFRP'): - import requests as rq +def download_data(date, ftype="meanFRP"): from datetime import datetime + + import requests as rq from numpy import arange + if isinstance(date, datetime): - year = date.strftime('%Y') - yyyymmdd = date.strftime('%Y%m%d') + yyyymmdd = date.strftime("%Y%m%d") else: from pandas import Timestamp + date = Timestamp(date) - year = date.strftime('%Y') - yyyymmdd = date.strftime('%Y%m%d') - url_ftype = "&files={}.".format(ftype) + yyyymmdd = date.strftime("%Y%m%d") + + url_ftype = f"&files={ftype}." + for i in arange(1, 7, dtype=int).astype(str): - tile = ".FV3C384Grid.tile{}.bin".format(i) - url = "{}{}{}{}{}".format(base_dir, yyyymmdd, url_ftype, yyyymmdd, - tile) - fname = "{}.{}.FV3.C384Grid.tile{}.bin".format(ftype, yyyymmdd, i) - print('Retrieving file:', fname) + tile = f".FV3C384Grid.tile{i}.bin" + url = f"{base_dir}{yyyymmdd}{url_ftype}{yyyymmdd}{tile}" + fname = f"{ftype}.{yyyymmdd}.FV3.C384Grid.tile{i}.bin" + print("Retrieving file:", fname) r = rq.get(url) - with open(fname, 'wb') as f: + with open(fname, "wb") as f: f.write(r.content) diff --git a/monetio/sat/utility.py b/monetio/sat/utility.py index ff132f72..6f80e0d3 100644 --- a/monetio/sat/utility.py +++ b/monetio/sat/utility.py @@ -18,10 +18,12 @@ def write_array_tif(data, crs, transform, output_filename): Description of returned object. """ + import rasterio + new_dataset = rasterio.open( output_filename, - 'w', - driver='GTiff', + "w", + driver="GTiff", height=data.shape[0], width=data.shape[1], count=1, @@ -50,8 +52,10 @@ def latlon_2modis_tile(lat, lon): H and V """ + from pyproj import Proj + # reference: https://code.env.duke.edu/projects/mget/wiki/SinusoidalMODIS - p_modis_grid = Proj('+proj=sinu +R=6371007.181 +nadgrids=@null +wktext') + p_modis_grid = Proj("+proj=sinu +R=6371007.181 +nadgrids=@null +wktext") x, y = p_modis_grid(lon, lat) # or the inverse, from x, y to lon, lat # lat, lon = p_modis_grid(x, y, inverse=True) @@ -63,75 +67,76 @@ def latlon_2modis_tile(lat, lon): return int(V), int(H) -def warp_to_wgs84(infile): - import dask - import os - crs = '+proj=longlat +ellps=WGS84 +datum=WGS84' - out_file = infile.replace('.tif','_warped.tif') - convert_crs(infile,out_file) - os.remove(infile) - - -def convert_crs(in_file, out_file, dst_crs='EPSG:4326'): - """Short summary. - - Parameters - ---------- - in_file : type - Description of parameter `in_file`. - out_file : type - Description of parameter `out_file`. - dst_crs : type - Description of parameter `dst_crs`. - - Returns - ------- - type - Description of returned object. - - """ - # dst_crs = 'EPSG:4326' - - with rasterio.open(in_file) as src: - transform, width, height = calculate_default_transform( - src.crs, - dst_crs, - src.width, - src.height, - *src.bounds, - resolution=(0.004, 0.004)) - - kwargs = src.meta.copy() - kwargs.update({ - 'crs': dst_crs, - 'transform': transform, - 'width': width, - 'height': height - }) - - with rasterio.open(out_file, 'w', **kwargs) as dst: - for i in range(1, src.count + 1): - reproject(source=rasterio.band(src, i), - destination=rasterio.band(dst, i), - src_transform=src.transform, - src_crs=src.crs, - dst_transform=transform, - dst_crs=dst_crs, - resampling=Resampling.nearest) - -def merge_tile_data(files_to_merge,outname): - """ merges all swath data for a particular day and time fo day""" - src_files_to_mosaic = [] - for fp in files_to_merge: - src = rasterio.open(fp) - src_files_to_mosaic.append(src) - mosaic, out_trans = merge(src_files_to_mosaic, nodata=0) - out_meta = src.meta.copy() - out_meta.update({ - "driver": "GTiff", - "height": mosaic.shape[1], - "width": mosaic.shape[2], - "transform": out_trans, - "crs": crs}) - with rasterio.open(outname, "w", **out_meta) as dest: - dest.write(mosaic) +# def warp_to_wgs84(infile): +# import os + +# crs = "+proj=longlat +ellps=WGS84 +datum=WGS84" +# out_file = infile.replace(".tif", "_warped.tif") +# convert_crs(infile, out_file, dst_crs=crs) +# os.remove(infile) + + +# def convert_crs(in_file, out_file, dst_crs="EPSG:4326"): +# """Short summary. + +# Parameters +# ---------- +# in_file : type +# Description of parameter `in_file`. +# out_file : type +# Description of parameter `out_file`. +# dst_crs : type +# Description of parameter `dst_crs`. + +# Returns +# ------- +# type +# Description of returned object. + +# """ +# import rasterio + +# # dst_crs = 'EPSG:4326' + +# with rasterio.open(in_file) as src: +# transform, width, height = calculate_default_transform( # not defined! +# src.crs, dst_crs, src.width, src.height, *src.bounds, resolution=(0.004, 0.004) +# ) + +# kwargs = src.meta.copy() +# kwargs.update({"crs": dst_crs, "transform": transform, "width": width, "height": height}) + +# with rasterio.open(out_file, "w", **kwargs) as dst: +# for i in range(1, src.count + 1): +# reproject( # not defined! +# source=rasterio.band(src, i), +# destination=rasterio.band(dst, i), +# src_transform=src.transform, +# src_crs=src.crs, +# dst_transform=transform, +# dst_crs=dst_crs, +# resampling=Resampling.nearest, # not defined! +# ) + + +# def merge_tile_data(files_to_merge, outname): +# """merges all swath data for a particular day and time fo day""" +# import rasterio + +# src_files_to_mosaic = [] +# for fp in files_to_merge: +# src = rasterio.open(fp) +# src_files_to_mosaic.append(src) +# mosaic, out_trans = merge(src_files_to_mosaic, nodata=0) # not defined! +# out_meta = src.meta.copy() +# out_meta.update( +# { +# "driver": "GTiff", +# "height": mosaic.shape[1], +# "width": mosaic.shape[2], +# "transform": out_trans, +# "crs": crs, # not defined! +# } +# ) +# with rasterio.open(outname, "w", **out_meta) as dest: +# dest.write(mosaic) diff --git a/monetio/util.py b/monetio/util.py index 9c91037a..f285dd11 100644 --- a/monetio/util.py +++ b/monetio/util.py @@ -3,6 +3,8 @@ def nearest(items, pivot): def search_listinlist(array1, array2): + import numpy as np + # find intersections s1 = set(array1.flatten()) @@ -23,6 +25,7 @@ def search_listinlist(array1, array2): def linregress(x, y): + import numpy as np import statsmodels.api as sm xx = sm.add_constant(x) @@ -65,7 +68,7 @@ def kolmogorov_zurbenko_filter(df, window, iterations): def wsdir2uv(ws, wdir): - from numpy import pi, sin, cos + from numpy import cos, pi, sin u = -ws * sin(wdir * pi / 180.0) v = -ws * cos(wdir * pi / 180.0) @@ -73,44 +76,51 @@ def wsdir2uv(ws, wdir): def long_to_wide(df): - from pandas import Series, merge + from pandas import merge - w = df.pivot_table(values='obs', index=['time', 'siteid'], columns='variable').reset_index() - cols = Series(df.columns) - g = df.groupby('variable') + w = df.pivot_table(values="obs", index=["time", "siteid"], columns="variable").reset_index() + g = df.groupby("variable") for name, group in g: - w[name + '_unit'] = group.units.unique()[0] + w[name + "_unit"] = group.units.unique()[0] # mergeon = hstack((index.values, df.variable.unique())) - return merge(w, df.drop_duplicates(subset=['latitude', 'longitude']), on=['siteid', 'time']) + return merge(w, df.drop_duplicates(subset=["latitude", "longitude"]), on=["siteid", "time"]) def calc_8hr_rolling_max(df, col=None, window=None): df.index = df.time_local - df_rolling = df.groupby('siteid')[col].rolling(window, center=True, win_type='boxcar').mean().reset_index().dropna() - df_rolling_max = df_rolling.groupby('siteid').resample('D', on='time_local').max().reset_index(drop=True) + df_rolling = ( + df.groupby("siteid")[col] + .rolling(window, center=True, win_type="boxcar") + .mean() + .reset_index() + .dropna() + ) + df_rolling_max = ( + df_rolling.groupby("siteid").resample("D", on="time_local").max().reset_index(drop=True) + ) df = df.reset_index(drop=True) - return df.merge(df_rolling_max, on=['siteid', 'time_local']) + return df.merge(df_rolling_max, on=["siteid", "time_local"]) def calc_24hr_ave(df, col=None): df.index = df.time_local - df_24hr_ave = df.groupby('siteid')[col].resample('D').mean().reset_index() + df_24hr_ave = df.groupby("siteid")[col].resample("D").mean().reset_index() df = df.reset_index(drop=True) - return df.merge(df_24hr_ave, on=['siteid', 'time_local']) + return df.merge(df_24hr_ave, on=["siteid", "time_local"]) def calc_3hr_ave(df, col=None): df.index = df.time_local - df_3hr_ave = df.groupby('siteid')[col].resample('3H').mean().reset_index() + df_3hr_ave = df.groupby("siteid")[col].resample("3H").mean().reset_index() df = df.reset_index(drop=True) - return df.merge(df_3hr_ave, on=['siteid', 'time_local']) + return df.merge(df_3hr_ave, on=["siteid", "time_local"]) def calc_annual_ave(df, col=None): df.index = df.time_local - df_annual_ave = df.groupby('siteid')[col].resample('A').mean().reset_index() + df_annual_ave = df.groupby("siteid")[col].resample("A").mean().reset_index() df = df.reset_index(drop=True) - return df.merge(df_annual_ave, on=['siteid', 'time_local']) + return df.merge(df_annual_ave, on=["siteid", "time_local"]) def get_giorgi_region_bounds(index=None, acronym=None): @@ -118,38 +128,133 @@ def get_giorgi_region_bounds(index=None, acronym=None): i = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] acro = [ - 'NAU', - 'SAU', - 'AMZ', - 'SSA', - 'CAM', - 'WNA', - 'CNA', - 'ENA', - 'ALA', - 'GRL', - 'MED', - 'NEU', - 'WAF', - 'EAF', - 'SAF', - 'SAH', - 'SEA', - 'EAS', - 'SAS', - 'CAS', - 'TIB', - 'NAS', + "NAU", + "SAU", + "AMZ", + "SSA", + "CAM", + "WNA", + "CNA", + "ENA", + "ALA", + "GRL", + "MED", + "NEU", + "WAF", + "EAF", + "SAF", + "SAH", + "SEA", + "EAS", + "SAS", + "CAS", + "TIB", + "NAS", + ] + lonmax = [ + 155, + 155, + -34, + -40, + -83, + -103, + -85, + -60, + -103, + -10, + 40, + 40, + 22, + 52, + 52, + 65, + 155, + 145, + 100, + 75, + 100, + 180, + ] + lonmin = [ + 110, + 110, + -82, + -76, + -116, + -130, + -103, + -85, + -170, + -103, + -10, + -10, + -20, + 22, + -10, + -20, + 95, + 100, + 65, + 40, + 75, + 40, + ] + latmax = [ + -11, + -28, + 12, + -20, + 30, + 60, + 50, + 50, + 72, + 85, + 48, + 75, + 18, + 18, + -12, + 30, + 20, + 50, + 30, + 50, + 50, + 70, + ] + latmin = [ + -28, + -45, + -20, + -56, + 10, + 30, + 30, + 25, + 60, + 50, + 30, + 48, + -12, + -12, + -35, + 18, + -11, + 20, + 5, + 30, + 30, + 50, ] - lonmax = [155, 155, -34, -40, -83, -103, -85, -60, -103, -10, 40, 40, 22, 52, 52, 65, 155, 145, 100, 75, 100, 180] - lonmin = [110, 110, -82, -76, -116, -130, -103, -85, -170, -103, -10, -10, -20, 22, -10, -20, 95, 100, 65, 40, 75, 40] - latmax = [-11, -28, 12, -20, 30, 60, 50, 50, 72, 85, 48, 75, 18, 18, -12, 30, 20, 50, 30, 50, 50, 70] - latmin = [-28, -45, -20, -56, 10, 30, 30, 25, 60, 50, 30, 48, -12, -12, -35, 18, -11, 20, 5, 30, 30, 50] - df = pd.DataFrame({'latmin': latmin, 'lonmin': lonmin, 'latmax': latmax, 'lonmax': lonmax, 'acronym': acro}, index=i) + df = pd.DataFrame( + {"latmin": latmin, "lonmin": lonmin, "latmax": latmax, "lonmax": lonmax, "acronym": acro}, + index=i, + ) try: if index is None and acronym is None: - print('either index or acronym needs to be supplied') - print('look here https://web.northeastern.edu/sds/web/demsos/images_002/subregions.jpg') + print("either index or acronym needs to be supplied") + print("look here https://web.northeastern.edu/sds/web/demsos/images_002/subregions.jpg") raise ValueError elif index is not None: return df.loc[df.index == index].values.flatten() @@ -160,13 +265,18 @@ def get_giorgi_region_bounds(index=None, acronym=None): def get_giorgi_region_df(df): - df.loc[:, 'GIORGI_INDEX'] = None - df.loc[:, 'GIORGI_ACRO'] = None + df.loc[:, "GIORGI_INDEX"] = None + df.loc[:, "GIORGI_ACRO"] = None for i in range(22): latmin, lonmin, latmax, lonmax, acro = get_giorgi_region_bounds(index=int(i + 1)) - con = (df.longitude <= lonmax) & (df.longitude >= lonmin) & (df.latitude <= latmax) & (df.latitude >= latmin) - df.loc[con, 'GIORGI_INDEX'] = i + 1 - df.loc[con, 'GIORGI_ACRO'] = acro + con = ( + (df.longitude <= lonmax) + & (df.longitude >= lonmin) + & (df.latitude <= latmax) + & (df.latitude >= latmin) + ) + df.loc[con, "GIORGI_INDEX"] = i + 1 + df.loc[con, "GIORGI_ACRO"] = acro return df @@ -202,20 +312,34 @@ def calc_13_category_usda_soil_type(clay, sand, silt): Description of returned object. """ - from numpy import zeros, where + from numpy import where, zeros stype = zeros(clay.shape) stype[where((silt + clay * 1.5 < 15.0) & (clay != 255))] = 1.0 # SAND - stype[where((silt + 1.5 * clay >= 15.0) & (silt + 1.5 * clay < 30) & (clay != 255))] = 2.0 # Loamy Sand - stype[where((clay >= 7.0) & (clay < 20) & (sand > 52) & (silt + 2 * clay >= 30) & (clay != 255))] = 3.0 # Sandy Loam (cond 1) - stype[where((clay < 7) & (silt < 50) & (silt + 2 * clay >= 30) & (clay != 255))] = 3 # sandy loam (cond 2) - stype[where((silt >= 50) & (clay >= 12) & (clay < 27) & (clay != 255))] = 4 # silt loam (cond 1) + stype[ + where((silt + 1.5 * clay >= 15.0) & (silt + 1.5 * clay < 30) & (clay != 255)) + ] = 2.0 # Loamy Sand + stype[ + where((clay >= 7.0) & (clay < 20) & (sand > 52) & (silt + 2 * clay >= 30) & (clay != 255)) + ] = 3.0 # Sandy Loam (cond 1) + stype[ + where((clay < 7) & (silt < 50) & (silt + 2 * clay >= 30) & (clay != 255)) + ] = 3 # sandy loam (cond 2) + stype[ + where((silt >= 50) & (clay >= 12) & (clay < 27) & (clay != 255)) + ] = 4 # silt loam (cond 1) stype[where((silt >= 50) & (silt < 80) & (clay < 12) & (clay != 255))] = 4 # silt loam (cond 2) stype[where((silt >= 80) & (clay < 12) & (clay != 255))] = 5 # silt - stype[where((clay >= 7) & (clay < 27) & (silt >= 28) & (silt < 50) & (sand <= 52) & (clay != 255))] = 6 # loam - stype[where((clay >= 20) & (clay < 35) & (silt < 28) & (sand > 45) & (clay != 255))] = 7 # sandy clay loam + stype[ + where((clay >= 7) & (clay < 27) & (silt >= 28) & (silt < 50) & (sand <= 52) & (clay != 255)) + ] = 6 # loam + stype[ + where((clay >= 20) & (clay < 35) & (silt < 28) & (sand > 45) & (clay != 255)) + ] = 7 # sandy clay loam stype[where((clay >= 27) & (clay < 40.0) & (sand > 40) & (clay != 255))] = 8 # silt clay loam - stype[where((clay >= 27) & (clay < 40.0) & (sand > 20) & (sand <= 45) & (clay != 255))] = 9 # clay loam + stype[ + where((clay >= 27) & (clay < 40.0) & (sand > 20) & (sand <= 45) & (clay != 255)) + ] = 9 # clay loam stype[where((clay >= 35) & (sand > 45) & (clay != 255))] = 10 # sandy clay stype[where((clay >= 40) & (silt >= 40) & (clay != 255))] = 11 # silty clay stype[where((clay >= 40) & (sand <= 45) & (silt < 40) & (clay != 255))] = 12 # clay diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..99085e20 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ + +[tool.black] +line-length = 100 + +[tool.isort] +known_first_party = "monetio" +profile = "black" +line_length = 100 diff --git a/setup.py b/setup.py index e6d80702..6d074e4f 100644 --- a/setup.py +++ b/setup.py @@ -1,21 +1,23 @@ try: - from setuptools import setup, find_packages + from setuptools import find_packages, setup except ImportError: from distutils.core import setup setup( - name='monetio', - version='0.1', - url='https://github.com/noaa-oar-arl/monetio', - license='MIT', + name="monetio", + version="0.1", + url="https://github.com/noaa-oar-arl/monetio", + license="MIT", include_package_data=True, - author='Barry D. Baker', - author_email='barry.baker@noaa.gov', - maintainer='Barry Baker', - maintainer_email='barry.baker@noaa.gov', + author="Barry D. Baker", + author_email="barry.baker@noaa.gov", + maintainer="Barry Baker", + maintainer_email="barry.baker@noaa.gov", packages=find_packages(), - package_data={'': ['data/*.txt', 'data/*.dat', 'data/*.hdf', 'data/*.ncf', 'data/*.jpg', 'data/*.png']}, - keywords=['model', 'verification', 'hysplit', 'cmaq', 'atmosphere', 'camx', 'evaluation'], - description='The Model and Observation Evaluation Toolkit (MONET)', - install_requires=['pandas', 'netcdf4', 'xarray', 'scipy', 'dask', 's3fs'], + package_data={ + "": ["data/*.txt", "data/*.dat", "data/*.hdf", "data/*.ncf", "data/*.jpg", "data/*.png"] + }, + keywords=["model", "verification", "hysplit", "cmaq", "atmosphere", "camx", "evaluation"], + description="The Model and Observation Evaluation Toolkit (MONET)", + install_requires=["pandas", "netcdf4", "xarray", "scipy", "dask", "s3fs"], ) diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..4e82f5bc --- /dev/null +++ b/tox.ini @@ -0,0 +1,8 @@ +[flake8] +max-line-length = 100 +ignore = + E203 # whitespace before ':' - doesn't work well with black + E402 # module level import not at top of file + E501 # line too long - let black worry about that + W503 # line break before binary operator + E226 # missing whitespace around arithmetic operator