Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Obs only #593

Merged
merged 11 commits into from
Apr 25, 2022
9 changes: 8 additions & 1 deletion pyaerocom/aeroval/experiment_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,14 @@ def _create_statistics_json(self):
stats_info = statistics_defaults
stats_info.update(extended_statistics)
if self.cfg.statistics_opts.add_trends:
stats_info.update(statistics_trend)
if self.cfg.processing_opts.obs_only:
obs_statistics_trend = {}
for key in statistics_trend.keys():
avaldebe marked this conversation as resolved.
Show resolved Hide resolved
if "mod" not in key:
obs_statistics_trend[key] = statistics_trend[key]
stats_info.update(obs_statistics_trend)
avaldebe marked this conversation as resolved.
Show resolved Hide resolved
else:
stats_info.update(statistics_trend)
write_json(stats_info, self.statistics_file, indent=4)

def _get_var_name_and_type(self, var_name):
Expand Down
21 changes: 20 additions & 1 deletion pyaerocom/aeroval/experiment_processor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import logging
from multiprocessing import dummy

from pyaerocom.aeroval._processing_base import HasColocator, ProcessingEngine
from pyaerocom.aeroval.coldatatojson_engine import ColdataToJsonEngine
from pyaerocom.aeroval.helpers import _delete_dummy_model, _make_dummy_model
from pyaerocom.aeroval.modelmaps_engine import ModelMapsEngine
from pyaerocom.aeroval.superobs_engine import SuperObsEngine

Expand Down Expand Up @@ -101,8 +106,20 @@ def run(self, model_name=None, obs_name=None, var_list=None, update_interface=Tr
var_list = [var_list]

self.cfg._check_time_config()
model_list = self.cfg.model_cfg.keylist(model_name)

obs_list = self.cfg.obs_cfg.keylist(obs_name)
if self.cfg.model_cfg == {}:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could use the fact that an empty dictionary "is falsy"

        if not self.cfg.model_cfg:
            ...

this also covers the case when self.cfg.model_cfg is None

Is it possible for the model_cfg attribute to be missing?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it is possible. I think aeroval will crash if no model_cfg is given

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and model_cfg = None?

logging.info("No model found, will make dummy model data")
self.cfg.webdisp_opts.hide_charts = ["scatterplot"]
self.cfg.webdisp_opts.hide_pages = ["maps.php", "intercomp.php", "overall.php"]
model_id = _make_dummy_model(obs_list, self.cfg)
self.cfg.processing_opts.obs_only = True
use_dummy_model = True
else:
model_id = None
use_dummy_model = False

model_list = self.cfg.model_cfg.keylist(model_name)

logger.info("Start processing")

Expand All @@ -119,6 +136,8 @@ def run(self, model_name=None, obs_name=None, var_list=None, update_interface=Tr

if update_interface:
self.update_interface()
if use_dummy_model:
_delete_dummy_model(model_id)
logger.info("Finished processing.")

def update_interface(self):
Expand Down
72 changes: 71 additions & 1 deletion pyaerocom/aeroval/helpers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,26 @@
import glob
import logging
import os
import shutil

from pyaerocom import const
from pyaerocom.aeroval.modelentry import ModelEntry
from pyaerocom.aeroval.varinfo_web import VarinfoWeb
from pyaerocom.colocateddata import ColocatedData
from pyaerocom.colocation_auto import Colocator
from pyaerocom.exceptions import TemporalResolutionError
from pyaerocom.helpers import start_stop_str
from pyaerocom.griddeddata import GriddedData
from pyaerocom.helpers import (
_make_dummy_cube,
get_highest_resolution,
get_max_period_range,
start_stop_str,
)
from pyaerocom.io import ReadGridded
from pyaerocom.tstype import TsType

logger = logging.getLogger(__name__)


def check_var_ranges_avail(model_data, var_name):
"""
Expand Down Expand Up @@ -130,3 +147,56 @@ def _get_min_max_year_periods(statistics_periods):
if perstop > stopyr:
stopyr = perstop
return startyr, stopyr


def _make_dummy_model(obs_list: list, cfg) -> str:
avaldebe marked this conversation as resolved.
Show resolved Hide resolved

# Sets up variable for the model register
tmpdir = const.LOCAL_TMP_DIR
const.add_data_search_dir(tmpdir)

model_id = "dummy_model"
outdir = os.path.join(tmpdir, f"{model_id}/renamed")

os.makedirs(outdir, exist_ok=True)
Comment on lines +160 to +162
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure if GriddedData.to_netcdf accepts pathlib.Path objects but it might be cleaner to write this part with a Path object

    outdir = Path(tmpdir) / f"{model_id}/renamed"
    outdir.mkdir(parents=True, exist_ok=True)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GriddedData.to_netcdf uses os.path.join, so I think it must be str. If we are going to change this, then it should be its own PR

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

os.path.join accepts path-like objects (see docs),
which means that it will accept pathlib.Path objects

Copy link
Collaborator

@avaldebe avaldebe Mar 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not using a Path object?

    outdir = Path(tmpdir) / f"{model_id}/renamed"
    outdir.makedir(parents=True, exist_ok=True)

as I wrote on my previous review

GriddedData.to_netcdf uses os.path.join, so I think it must be str. If we are going to change this, then it should be its own PR

os.path.join accepts path-like objects (see docs),
which means that it will accept pathlib.Path objects


# Finds dates and freq to use, so that all observations are covered
(start, stop) = get_max_period_range(cfg.time_cfg.periods)
freq = get_highest_resolution(*cfg.time_cfg.freqs)

# Loops over variables in obs
for obs in obs_list:
for var in cfg.obs_cfg[obs]["obs_vars"]:
# Create dummy cube

dummy_cube = _make_dummy_cube(var, start_yr=start, stop_yr=stop, freq=freq)

# Converts cube to GriddedData
dummy_grid = GriddedData(dummy_cube)

# Loop over each year
yr_gen = dummy_grid.split_years()

for dummy_grid_yr in yr_gen:
# Add to netcdf
yr = dummy_grid_yr.years_avail()[0]
vert_code = cfg.obs_cfg[obs]["obs_vert_type"]

save_name = dummy_grid_yr.aerocom_savename(model_id, var, vert_code, yr, freq)
dummy_grid_yr.to_netcdf(outdir, savename=save_name)

# Add dummy model to cfg
cfg.model_cfg["dummy"] = ModelEntry(model_id="dummy_model")

return model_id


def _delete_dummy_model(model_id: str):
tmpdir = const.LOCAL_TMP_DIR
const.add_data_search_dir(tmpdir)

outdir = os.path.join(tmpdir, f"{model_id}/renamed")
dirs = glob.glob(outdir + "/*.nc")
for d in dirs:
print(f"Deleting dummy model {d}")
os.remove(d)
avaldebe marked this conversation as resolved.
Show resolved Hide resolved
3 changes: 3 additions & 0 deletions pyaerocom/aeroval/setupclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ def __init__(self, **kwargs):
self.var_order_menu = []
self.obs_order_menu = []
self.model_order_menu = []
self.hide_charts = []
self.hide_pages = []
self.update(**kwargs)


Expand All @@ -227,6 +229,7 @@ def __init__(self, **kwargs):
self.only_colocation = False
#: If True, process only maps (skip obs evaluation)
self.only_model_maps = False
self.obs_only = False
self.update(**kwargs)


Expand Down
7 changes: 5 additions & 2 deletions pyaerocom/griddeddata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1940,7 +1940,10 @@ def crop(self, lon_range=None, lat_range=None, time_range=None, region=None):
if time_range is None:
return GriddedData(data, **suppl)
else:
if all(isinstance(x, str) for x in time_range):

if all(isinstance(x, str) for x in time_range) or all(
isinstance(x, np.datetime64) for x in time_range
):
avaldebe marked this conversation as resolved.
Show resolved Hide resolved
time_range = (pd.Timestamp(time_range[0]), pd.Timestamp(time_range[1]))
avaldebe marked this conversation as resolved.
Show resolved Hide resolved
if all(isinstance(x, pd.Timestamp) for x in time_range):
logger.info("Cropping along time axis based on Timestamps")
Expand All @@ -1953,7 +1956,7 @@ def crop(self, lon_range=None, lat_range=None, time_range=None, region=None):
elif all(isinstance(x, int) for x in time_range):
logger.info("Cropping along time axis based on indices")
data = data[time_range[0] : time_range[1]]
if not data:
else:
raise DataExtractionError("Failed to apply temporal cropping")
return GriddedData(data, check_unit=False, convert_unit_on_init=False, **suppl)

Expand Down
74 changes: 74 additions & 0 deletions pyaerocom/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
General helper methods for the pyaerocom library.
"""

import logging
import math as ma
from collections import Counter
Expand Down Expand Up @@ -39,6 +40,7 @@
sec_units,
)
from pyaerocom.tstype import TsType
from pyaerocom.variable_helpers import get_variable

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -1682,3 +1684,75 @@ def get_time_rng_constraint(start, stop):
t_upper = iris.time.PartialDateTime(year=stop.year, month=stop.month, day=stop.day)

return iris.Constraint(time=lambda cell: t_lower <= cell <= t_upper)


def get_max_period_range(periods):
start = min([int(per.split("-")[0]) for per in periods])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can pass the generator expression directly to min without creating a list, e.g.

    start = min(int(per.split("-")[0]) for per in periods)

stop = max(
[int(per.split("-")[1]) if len(per.split("-")) > 1 else int(per) for per in periods]
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you do not need to pass a list to min/max a generator expression will work just as well

    start = min(int(per.split("-")[0]) for per in periods)
    stop = max(
        int(per.split("-")[1]) if "-" in per 1 else int(per) for per in periods
    )


return start, stop


def _make_dummy_cube(
var_name: str, start_yr: int = 2000, stop_yr: int = 2020, freq: str = "daily", dtype=float
) -> iris.cube.Cube:
avaldebe marked this conversation as resolved.
Show resolved Hide resolved
startstr = f"days since {start_yr}-01-01 00:00"

if freq not in TS_TYPE_TO_PANDAS_FREQ.keys():
raise ValueError(f"{freq} not a recognized frequency")
Comment on lines +1701 to +1702
Copy link
Collaborator

@avaldebe avaldebe Mar 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can test membership on a dictionary directly without calling .keys()

    if freq not in TS_TYPE_TO_PANDAS_FREQ:
        raise ValueError(f"{freq} not a recognized frequency")


start_str = f"{start_yr}-01-01 00:00"
stop_str = f"{stop_yr}-12-31 00:00"
times = pd.date_range(start_str, stop_str, freq=TS_TYPE_TO_PANDAS_FREQ[freq])

days_since_start = np.arange(len(times))
unit = get_variable(var_name).units

lat_range = (-180, 180)
lon_range = (-90, 90)
lat_res_deg = 90
lon_res_deg = 45
time_unit = Unit(startstr, calendar="gregorian")

lons = np.arange(lon_range[0] + lon_res_deg / 2, lon_range[1] + lon_res_deg / 2, lon_res_deg)
lats = np.arange(lat_range[0] + lat_res_deg / 2, lat_range[1] + lat_res_deg / 2, lat_res_deg)

latdim = iris.coords.DimCoord(
lats,
var_name="lat",
standard_name="latitude",
long_name="Center coordinates for latitudes",
circular=False,
units=Unit("degrees"),
)

londim = iris.coords.DimCoord(
lons,
var_name="lon",
standard_name="longitude",
long_name="Center coordinates for longitudes",
circular=False,
units=Unit("degrees"),
)

timedim = iris.coords.DimCoord(
days_since_start, var_name="time", standard_name="time", long_name="Time", units=time_unit
)

latdim.guess_bounds()
londim.guess_bounds()
dummy = iris.cube.Cube(np.ones((len(times), len(lats), len(lons))), units=unit)

dummy.add_dim_coord(latdim, 1)
dummy.add_dim_coord(londim, 2)
dummy.add_dim_coord(timedim, 0)
dummy.var_name = var_name
dummy.ts_type = freq

dummy.data = dummy.data.astype(dtype)
for coord in dummy.coords():
coord.points = coord.points.astype(dtype)

return dummy