Skip to content

Commit

Permalink
Merge pull request #3 from ArtesiaWater/dev
Browse files Browse the repository at this point in the history
add logging, bugfixes, etc.
  • Loading branch information
dbrakenhoff authored May 16, 2019
2 parents b1deb3e + fad45dd commit 45f3512
Show file tree
Hide file tree
Showing 11 changed files with 172 additions and 98 deletions.
2 changes: 1 addition & 1 deletion examples/eag_excel_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
e = wb.create_eag(eag_id, eag_name, df_ms, use_waterlevel_series=False)

# Add TimeSeries
e.add_series(df_ts, tmin=tmin, tmax=tmax)
e.add_series_from_database(df_ts, tmin=tmin, tmax=tmax)

# Add extra series (optional)
df_series = wb.utils.get_extra_series_from_excel(excelfile)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
description='Python Package voor het maken van Waterbalansen bij Waternet',
long_description=l_d,
url='http://waternet.nl',
author='Raoul Collenteur',
author_email='r.collenteur@artesia-water.nl, ',
author='David Brakenhoff',
author_email='d.brakenhoff@artesia-water.nl, ',
license='Unknown',
classifiers=[
'Development Status :: 4 - Beta',
Expand Down
2 changes: 1 addition & 1 deletion tests/test_run_eag.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_add_real_series_and_simulate():
e = test_make_eag()

# add default series
e.add_series(reeksen, tmin=tmin, tmax=tmax)
e.add_series_from_database(reeksen, tmin=tmin, tmax=tmax)

# add external series
series = pd.read_csv(os.path.join(
Expand Down
35 changes: 18 additions & 17 deletions waterbalans/buckets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""
from abc import ABC
# from ast import literal_eval

import pandas as pd
from pastas.read import KnmiStation

Expand Down Expand Up @@ -135,7 +135,7 @@ def simulate(self, params=None, tmin=None, tmax=None, dt=1.0):

# test if columns are present!
if not {"Neerslag", "Verdamping", "Qkwel"}.issubset(series.columns):
print("Warning: {} not in series. Assumed equal to 0!".format(
self.eag.logger.warning("Warning: {} not in series. Assumed equal to 0!".format(
{"Neerslag", "Verdamping", "Qkwel"} - set(series.columns)))

for _, pes in series.reindex(columns=["Neerslag", "Verdamping", "Qkwel"],
Expand Down Expand Up @@ -208,7 +208,7 @@ def simulate(self, params=None, tmin=None, tmax=None, dt=1.0):

# test if columns are present!
if not {"Neerslag", "Verdamping", "Qkwel"}.issubset(series.columns):
print("Warning: {} not in series. Assumed equal to 0!".format(
self.eag.logger.warning("Warning: {} not in series. Assumed equal to 0!".format(
{"Neerslag", "Verdamping", "Qkwel"} - set(series.columns)))
for _, pes in series.reindex(columns=["Neerslag", "Verdamping", "Qkwel"],
fill_value=0.0).iterrows():
Expand Down Expand Up @@ -258,7 +258,7 @@ def simulate(self, params=None, tmin=None, tmax=None, dt=1.0):
# Get parameters
non_defined_params = set(self.parameters.index) - set(params.index)
if len(non_defined_params) > 0:
print("Warning: {} not set in parameters, using default values!".format(
self.eag.logger.warning("Warning: {} not set in parameters, using default values!".format(
non_defined_params))

self.parameters.update(params)
Expand All @@ -283,9 +283,9 @@ def simulate(self, params=None, tmin=None, tmax=None, dt=1.0):

# test if columns are present!
if not {"Neerslag", "Verdamping", "Qkwel"}.issubset(series.columns):
print("Warning Bucket {0}-{1}: {2} not in series. Assumed equal to 0!".format(self.name, self.idn,
{"Neerslag", "Verdamping", "Qkwel"} -
set(series.columns)))
self.eag.logger.warning("Warning Bucket {0}-{1}: {2} not in series. Assumed equal to 0!".format(self.name, self.idn,
{"Neerslag", "Verdamping", "Qkwel"} -
set(series.columns)))
for _, pes in series.reindex(columns=["Neerslag", "Verdamping", "Qkwel"],
fill_value=0.0).iterrows():
p, e, s = pes
Expand Down Expand Up @@ -335,14 +335,14 @@ def simulate(self, params=None, tmin=None, tmax=None, dt=1.0):
# And also if a different period is calculated!
try:
if self.use_eag_cso_series:
print("Pick up CSO timeseries from eag.series...",
end="", flush=True)
self.eag.logger.info(
"Pick up CSO timeseries from eag.series...")
ts_cso = self.eag.series.loc[pd.to_datetime(
tmin):pd.to_datetime(tmax), "q_cso"]
print("Success!", end="\n")
self.eag.logger.info("Success!")
else:
print("Try picking up CSO timeseries from pickle or csv...",
end="", flush=True)
self.eag.logger.info(
"Pick up CSO timeseries from pickle or csv...")
if self.path_to_cso_series is None:
fcso = r"./data/cso_series/{0:g}_cso_timeseries.pklz".format(
knmistn)
Expand All @@ -353,15 +353,16 @@ def simulate(self, params=None, tmin=None, tmax=None, dt=1.0):
else:
ts_cso = pd.read_csv(fcso, index_col=[0], parse_dates=True)
ts_cso = ts_cso.loc[pd.to_datetime(tmin):pd.to_datetime(tmax)]
print("Success!", end="\n")
except FileNotFoundError:
print("Failed, calculating CSO series... Downloading hourly KNMI data for station {}".format(
self.eag.logger.info("Success!")
except (FileNotFoundError, KeyError) as e:
self.eag.logger.warning("Failed, calculating CSO series... Downloading hourly KNMI data for station {}".format(
knmistn))
prec = KnmiStation.download(
stns=[knmistn], interval="hour", start=tmin, end=tmax, vars="RH")
print("Download succeeded, calculating series...", end="", flush=True)
self.eag.logger.info(
"KNMI Download succeeded, calculating series...")
ts_cso = calculate_cso(prec.data.RH, Bmax, POCmax, alphasmooth=0.1)
print("Success! (Pickling series for future use.)")
self.eag.logger.info("Success! (Pickling series for future use.)")
ts_cso.to_pickle(r"./data/cso_series/{0:g}_cso_timeseries.pklz".format(knmistn),
compression="zip")

Expand Down
2 changes: 2 additions & 0 deletions waterbalans/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def create_eag(idn, name, buckets, gaf=None, series=None, use_waterlevel_series=
"""
eag = Eag(idn=idn, name=name, gaf=gaf, series=series)
eag.logger.info("Creating EAG object for '{}'".format(name))

# Voeg bakjes toe
for _, bucket in buckets.iterrows():
Expand Down Expand Up @@ -80,6 +81,7 @@ def create_gaf(idn, name, gafbuckets=None, eags=None, series=None,
"""
gaf = Gaf(idn=idn, name=name, series=series)
gaf.logger.info("Creating GAF object for '{}':".format(name))

# if Gaf has not been split into EAGs use gafbucket df as model structure
if gafbuckets is not None:
Expand Down
71 changes: 53 additions & 18 deletions waterbalans/eag.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

import logging
from collections import OrderedDict

import numpy as np
Expand Down Expand Up @@ -36,6 +37,9 @@ class Eag:
"""

def __init__(self, idn=None, name=None, gaf=None, series=None):

self.logger = self.get_logger()

# Basic information
self.gaf = gaf
self.idn = idn
Expand All @@ -57,6 +61,22 @@ def __init__(self, idn=None, name=None, gaf=None, series=None):
# Add functionality from other modules
self.plot = Eag_Plots(self)

def __repr__(self):
return "<EAG object: {0}>".format(self.name)

def get_logger(self, log_level=logging.INFO, filename=None):

logging.basicConfig(format='%(asctime)s | %(funcName)s - %(levelname)s : %(message)s',
level=logging.INFO)
logger = logging.getLogger()
logger.setLevel(log_level)

if filename is not None:
fhandler = logging.FileHandler(filename=filename, mode='w')
logger.addHandler(fhandler)

return logger

def add_bucket(self, bucket, replace=False):
"""Add a single bucket to the Eag.
Expand Down Expand Up @@ -92,8 +112,16 @@ def add_water(self, water, replace=False):
else:
self.water = water

def add_series(self, series, tmin="2000", tmax="2015", freq="D", fillna=False, method="append"):
"""Method to add timeseries based on a pandas DataFrame.
def add_series_from_database(self, series, tmin="2000", tmax="2015",
freq="D", fillna=False, method="append"):
"""Method to add timeseries based on a DataFrame containing
information about the series. Series are described by one or more
rows in the DataFrame with at least the following columns:
- Bucket ID: ID of the bucket the series should be added to
- SeriesType (unfortunately called ParamType at the moment): Origin or Type
of the Series: e.g. FEWS, KNMI, Local, ValueSeries, Constant
- ClusterType: Name of the parameter
Parameters
----------
Expand All @@ -114,6 +142,8 @@ def add_series(self, series, tmin="2000", tmax="2015", freq="D", fillna=False, m
"""
# Sort series to parse in order: Valueseries -> Local -> FEWS -> Constant
series = series.sort_values(by="ParamType", ascending=False)
self.logger.info(
"Parsing timeseries from database export and adding to EAG.")

for idn, df in series.groupby(["ParamType", "BakjeID", "ClusterType"], sort=False):
ParamType, BakjeID, ClusterType = idn
Expand All @@ -123,7 +153,8 @@ def add_series(self, series, tmin="2000", tmax="2015", freq="D", fillna=False, m
continue
elif ParamType == "FEWS" and df.shape[0] > 1:
# deal with multiple FEWS IDs for one ClusterType
print("Warning! Multiple FEWS series found for {}.".format(ClusterType))
self.logger.warning(
"Multiple FEWS series found for {}.".format(ClusterType))
for i in range(df.shape[0]):
series_list.append(get_series(
ClusterType, ParamType, df.iloc[i:i+1], tmin, tmax, freq))
Expand All @@ -138,8 +169,8 @@ def add_series(self, series, tmin="2000", tmax="2015", freq="D", fillna=False, m
# Fill NaNs if specified
if fillna:
if (s.isna().sum() > 0).all():
print("Filled {} NaN-values with 0.0 in series {}.".format(
np.int(s.isna().sum()), ClusterType))
self.logger.info("Filled {} NaN-values with 0.0 in series {}.".format(
np.int(s.isna().sum()), ClusterType))
s = s.fillna(0.0)
# Add series to appropriate object
if BakjeID in self.buckets.keys(): # add to Land Bucket
Expand Down Expand Up @@ -176,7 +207,8 @@ def add_series(self, series, tmin="2000", tmax="2015", freq="D", fillna=False, m
else: # add new series
self.series[ClusterType] = s
else:
print("Warning! Series '{}' not added.".format(ClusterType))
self.logger.warning(
"Series '{}' not added.".format(ClusterType))

# Create index based on tmin/tmax if no series are added to EAG!
if self.series.empty:
Expand Down Expand Up @@ -211,16 +243,18 @@ def add_timeseries(self, series, name=None, tmin="2000", tmax="2015", freq="D",
name = series.name

if name in self.series.columns:
print(
"Warning! Series {} already present in EAG, overwriting data where not NaN!".format(name))
self.logger.info(
"Adding timeseries '{0}' to EAG manually".format(name))
self.logger.warning(
"Series {} already present in EAG, overwriting data where not NaN!".format(name))
first_valid_index = series.first_valid_index()
last_valid_index = series.last_valid_index()
series = series.loc[first_valid_index:last_valid_index].dropna()
fillna = False

if fillna:
if (series.isna().sum() > 0).all():
print("Filled {0} NaN-values with '{1}' in series {2}.".format(
self.logger.info("Filled {0} NaN-values with '{1}' in series {2}.".format(
np.int(series.isna().sum()), method, name))
if isinstance(method, str):
series = series.fillna(method=method)
Expand Down Expand Up @@ -283,7 +317,7 @@ def simulate(self, params, tmin=None, tmax=None):
tmax: str or pandas.Timestamp
"""
print("Simulating: {}...".format(self.name))
self.logger.info("Simulating: {}...".format(self.name))
self.parameters = params
self.parameters.set_index(self.parameters.loc[:, "ParamCode"] + "_" +
self.parameters.loc[:,
Expand All @@ -292,20 +326,20 @@ def simulate(self, params, tmin=None, tmax=None):
for idn, bucket in self.buckets.items():
p = params.loc[params.loc[:, "BakjeID"] == idn]

print("Simulating the waterbalance for bucket: %s %s" %
(bucket.name, idn))
self.logger.info("Simulating the waterbalance for bucket: %s %s" %
(bucket.name, idn))
bucket.simulate(params=p.loc[:, "Waarde"], tmin=tmin, tmax=tmax)

p = params.loc[params.loc[:, "BakjeID"] == self.water.idn]
print("Simulating the waterbalance for bucket: %s %s" %
(self.water.name, self.water.idn))
self.logger.info("Simulating the waterbalance for bucket: %s %s" %
(self.water.name, self.water.idn))
self.water.simulate(params=p.loc[:, "Waarde"], tmin=tmin, tmax=tmax)
print("Simulation succesfully completed.")
self.logger.info("Simulation succesfully completed.")

def simulate_wq(self, wq_params, increment=False, tmin=None,
tmax=None, freq="D"):

print("Simulating water quality: {}...".format(self.name))
self.logger.info("Simulating water quality: {}...".format(self.name))

if not hasattr(self.water, "fluxes"):
raise AttributeError("No calculated fluxes in water bucket."
Expand Down Expand Up @@ -403,7 +437,7 @@ def simulate_wq(self, wq_params, increment=False, tmin=None,
mass_tot.loc[t] = M
C_out = M / self.water.storage.loc[t, "storage"]

print("Simulation water quality succesfully completed.")
self.logger.info("Simulation water quality succesfully completed.")
return mass_in, mass_out, mass_tot

def aggregate_fluxes(self):
Expand Down Expand Up @@ -494,7 +528,8 @@ def aggregate_fluxes_w_pumpstation(self):
gemaal_cols = [
icol for icol in self.series.columns if icol.lower().startswith("gemaal")]
if len(gemaal_cols) == 0:
print("Warning! No timeseries for pumping station. Cannot aggregate.")
self.logger.warning(
"No timeseries for pumping station. Cannot aggregate.")
return fluxes
fluxes.rename(columns={"berekende uitlaat": "sluitfout"}, inplace=True)
# Add pumping station timeseries to fluxes
Expand Down
30 changes: 25 additions & 5 deletions waterbalans/gaf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""This file contains the polder class
"""

import logging
from collections import OrderedDict

import numpy as np
Expand All @@ -25,6 +25,9 @@ class Gaf:
"""

def __init__(self, idn=None, name=None, eags=None, series=None):

self.logger = self.get_logger()

# Basic information
self.idn = idn
self.name = name
Expand All @@ -37,7 +40,8 @@ def __init__(self, idn=None, name=None, eags=None, series=None):
if isinstance(e, Eag):
self.eags[e.name] = e
else:
print("Warning! added Eags must be instance of Eag object.")
self.logger.warning(
"added Eags must be instance of Eag object.")

self.data = pd.DataFrame()
self.parameters = pd.DataFrame()
Expand All @@ -47,6 +51,22 @@ def __init__(self, idn=None, name=None, eags=None, series=None):
else:
self.series = series

def __repr__(self):
return "<GAF object: {0} containing {1} EAGs>".format(self.name, len(self.eags))

def get_logger(self, log_level=logging.INFO, filename=None):

logging.basicConfig(format='%(asctime)s | %(funcName)s - %(levelname)s : %(message)s',
level=logging.INFO)
logger = logging.getLogger()
logger.setLevel(log_level)

if filename is not None:
fhandler = logging.FileHandler(filename=filename, mode='w')
logger.addHandler(fhandler)

return logger

def add_eag(self, eag):
self.eags[eag.name] = eag

Expand Down Expand Up @@ -86,16 +106,16 @@ def add_timeseries(self, series, name=None, tmin="2000", tmax="2015", freq="D",

if fillna:
if (series.isna().sum() > 0).all():
print("Filled {0} NaN-values with '{1}' in series {2}.".format(
self.logger.info("Filled {0} NaN-values with '{1}' in series {2}.".format(
np.int(series.isna().sum()), method, name))
if isinstance(method, str):
series = series.fillna(method=method)
elif isinstance(method, float) or isinstance(method, int):
series = series.fillna(method)

if name in self.series.columns:
print(
"Warning! Series {} already present in EAG, overwriting data!".format(name))
self.logger.warning(
"Series {} already present in EAG, overwriting data!".format(name))

self.series.loc[series.index.intersection(
self.series.index), name] = series.loc[series.index.intersection(self.series.index)].values.squeeze()
Expand Down
Loading

0 comments on commit 45f3512

Please sign in to comment.