From e2a08a599a05a6b0587d99fe2102decec06f3339 Mon Sep 17 00:00:00 2001
From: Max Baak <maxbaak@gmail.com>
Date: Mon, 8 Feb 2021 12:19:46 +0100
Subject: [PATCH] popmon working with hgr v1.0.22

* popmon working with hgr v1.0.22
* removal of all HistogramContainer code
* all unit tests working
* fix all flake8 errors
* In readme.rst switch example and documentation sections
* bump up version to 0.3.15

bump up histogrammar version to 1.0.23

[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 README.rst                                    |  12 +-
 examples/flight_delays.py                     |   1 +
 examples/synthetic_data.py                    |   1 +
 popmon/__init__.py                            |   9 +-
 popmon/analysis/apply_func.py                 |  40 +-
 popmon/analysis/comparison/hist_comparer.py   |  26 +-
 popmon/analysis/functions.py                  |  39 +-
 popmon/analysis/hist_numpy.py                 |  95 ++--
 popmon/analysis/profiling/hist_profiler.py    |  44 +-
 popmon/config.py                              |   6 +-
 popmon/decorators/pandas.py                   |   5 +-
 popmon/decorators/spark.py                    |   3 +-
 popmon/hist/filling/__init__.py               |  10 +-
 popmon/hist/filling/histogram_filler_base.py  | 495 ------------------
 popmon/hist/filling/make_histograms.py        | 299 -----------
 popmon/hist/filling/numpy_histogrammar.py     | 107 ----
 popmon/hist/filling/pandas_histogrammar.py    | 264 ----------
 popmon/hist/filling/spark_histogrammar.py     | 251 ---------
 popmon/hist/filling/utils.py                  | 222 --------
 popmon/hist/hist_splitter.py                  |  26 +-
 popmon/hist/hist_utils.py                     | 313 +++++++++++
 popmon/hist/histogram.py                      | 360 -------------
 popmon/hist/patched_histogrammer.py           | 128 -----
 .../notebooks/popmon_tutorial_advanced.ipynb  |   4 +-
 popmon/pipeline/metrics.py                    |   6 +-
 popmon/pipeline/report.py                     |   6 +-
 popmon/stitching/hist_stitcher.py             |  19 +-
 popmon/version.py                             |   4 +-
 popmon/visualization/histogram_section.py     |   2 +-
 requirements.txt                              |   2 +-
 setup.py                                      |   2 +-
 .../analysis/profiling/test_hist_profiler.py  |  10 +-
 tests/popmon/analysis/test_functions.py       |   4 +-
 tests/popmon/analysis/test_hist_numpy.py      | 112 ++--
 tests/popmon/hist/test_histogram.py           |  87 ++-
 tests/popmon/hist/test_numpy_histogrammar.py  |  93 ----
 tests/popmon/hist/test_pandas_histogrammar.py | 231 --------
 tests/popmon/hist/test_spark_histogrammar.py  | 255 ---------
 tests/popmon/pipeline/test_report.py          |  14 +-
 tests/popmon/stats/test_numpy.py              |  10 +-
 .../visualization/test_report_generator.py    |   2 +-
 41 files changed, 582 insertions(+), 3037 deletions(-)
 delete mode 100644 popmon/hist/filling/histogram_filler_base.py
 delete mode 100644 popmon/hist/filling/make_histograms.py
 delete mode 100644 popmon/hist/filling/numpy_histogrammar.py
 delete mode 100644 popmon/hist/filling/pandas_histogrammar.py
 delete mode 100644 popmon/hist/filling/spark_histogrammar.py
 delete mode 100644 popmon/hist/filling/utils.py
 create mode 100644 popmon/hist/hist_utils.py
 delete mode 100644 popmon/hist/histogram.py
 delete mode 100644 popmon/hist/patched_histogrammer.py
 delete mode 100644 tests/popmon/hist/test_numpy_histogrammar.py
 delete mode 100644 tests/popmon/hist/test_pandas_histogrammar.py
 delete mode 100644 tests/popmon/hist/test_spark_histogrammar.py

diff --git a/README.rst b/README.rst
index 57a7271d..de927f57 100644
--- a/README.rst
+++ b/README.rst
@@ -35,18 +35,18 @@ For Spark 2.X compiled against scala 2.11, in the string above simply replace 2.
 
 `January 29, 2021`
 
-Documentation
-=============
-
-The entire `popmon` documentation including tutorials can be found at `read-the-docs <https://popmon.readthedocs.io>`_.
-
-
 Examples
 ========
 
 - `Flight Delays and Cancellations Kaggle data <https://crclz.com/popmon/reports/flight_delays_report.html>`_
 - `Synthetic data (code example below) <https://crclz.com/popmon/reports/test_data_report.html>`_
 
+Documentation
+=============
+
+The entire `popmon` documentation including tutorials can be found at `read-the-docs <https://popmon.readthedocs.io>`_.
+
+
 Notebooks
 =========
 
diff --git a/examples/flight_delays.py b/examples/flight_delays.py
index 657cff06..df628983 100644
--- a/examples/flight_delays.py
+++ b/examples/flight_delays.py
@@ -1,3 +1,4 @@
+# flake8: noqa
 import pandas as pd
 
 import popmon
diff --git a/examples/synthetic_data.py b/examples/synthetic_data.py
index b219a40b..d2f95974 100644
--- a/examples/synthetic_data.py
+++ b/examples/synthetic_data.py
@@ -1,3 +1,4 @@
+# flake8: noqa
 import pandas as pd
 
 import popmon
diff --git a/popmon/__init__.py b/popmon/__init__.py
index e73f9560..6427670d 100644
--- a/popmon/__init__.py
+++ b/popmon/__init__.py
@@ -18,12 +18,17 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
+# histogram and report functions
+from histogrammar.dfinterface.make_histograms import (
+    get_bin_specs,
+    get_time_axes,
+    make_histograms,
+)
+
 # flake8: noqa
 # pandas/spark dataframe decorators
 from popmon import decorators
 
-# histogram and report functions
-from .hist.filling import get_bin_specs, get_time_axes, make_histograms
 from .pipeline.metrics import df_stability_metrics, stability_metrics
 from .pipeline.report import df_stability_report, stability_report
 from .stitching import stitch_histograms
diff --git a/popmon/analysis/apply_func.py b/popmon/analysis/apply_func.py
index 339e5032..d2535bca 100644
--- a/popmon/analysis/apply_func.py
+++ b/popmon/analysis/apply_func.py
@@ -283,9 +283,9 @@ def apply_func(feature, selected_metrics, df, arr):
 
     if (
         "entire" in arr
-        and arr["entire"] is not None
-        and arr["entire"] is not False
-        and arr["entire"] != 0
+        and arr["entire"] is not None  # noqa: W503
+        and arr["entire"] is not False  # noqa: W503
+        and arr["entire"] != 0  # noqa: W503
     ):
         obj = func(df, *args, **kwargs)
     else:
@@ -302,48 +302,48 @@ def apply_func(feature, selected_metrics, df, arr):
         obj = {"_".join(df.columns): obj}
     elif (
         isinstance(obj, (list, tuple, np.ndarray))
-        and isinstance(df, pd.DataFrame)
-        and len(df.columns) == len(obj)
+        and isinstance(df, pd.DataFrame)  # noqa: W503
+        and len(df.columns) == len(obj)  # noqa: W503
     ):
         obj = {c: o for c, o in zip(df.columns, obj)}
     elif (
         isinstance(obj, (list, tuple, np.ndarray))
-        and isinstance(df, pd.Series)
-        and len(df.index) == len(obj)
+        and isinstance(df, pd.Series)  # noqa: W503
+        and len(df.index) == len(obj)  # noqa: W503
     ):
         obj = {df.name: pd.Series(data=obj, index=df.index)}
     elif (
         isinstance(obj, (list, tuple, np.ndarray))
-        and isinstance(df, pd.DataFrame)
-        and len(df.index) == len(obj)
+        and isinstance(df, pd.DataFrame)  # noqa: W503
+        and len(df.index) == len(obj)  # noqa: W503
     ):
         obj = {"_".join(df.columns): pd.Series(data=obj, index=df.index)}
     elif (
         isinstance(obj, pd.Series)
-        and isinstance(df, pd.Series)
-        and len(obj) == len(df)
-        and all(obj.index == df.index)
+        and isinstance(df, pd.Series)  # noqa: W503
+        and len(obj) == len(df)  # noqa: W503
+        and all(obj.index == df.index)  # noqa: W503
     ):
         obj = {df.name: obj}
     elif (
         isinstance(obj, pd.Series)
-        and isinstance(df, pd.DataFrame)
-        and len(obj) == len(df)
-        and all(obj.index == df.index)
+        and isinstance(df, pd.DataFrame)  # noqa: W503
+        and len(obj) == len(df)  # noqa: W503
+        and all(obj.index == df.index)  # noqa: W503
     ):
         obj = {"_".join(df.columns): obj}
     elif (
         isinstance(obj, pd.DataFrame)
-        and len(obj.columns) == 1
-        and len(obj.index) != len(df.index)
+        and len(obj.columns) == 1  # noqa: W503
+        and len(obj.index) != len(df.index)  # noqa: W503
     ):
         # e.g. output of normalized_hist_mean_cov: a dataframe with one column, actually a series
         obj = obj[obj.columns[0]].to_dict()
     elif (
         isinstance(obj, pd.DataFrame)
-        and len(obj.columns) == 1
-        and len(obj.index) == len(df.index)
-        and (obj.index != df.index).any()
+        and len(obj.columns) == 1  # noqa: W503
+        and len(obj.index) == len(df.index)  # noqa: W503
+        and (obj.index != df.index).any()  # noqa: W503
     ):
         # e.g. output of normalized_hist_mean_cov: a dataframe with one column, actually a series
         obj = obj[obj.columns[0]].to_dict()
diff --git a/popmon/analysis/comparison/hist_comparer.py b/popmon/analysis/comparison/hist_comparer.py
index 3cdcfc5f..db005a64 100644
--- a/popmon/analysis/comparison/hist_comparer.py
+++ b/popmon/analysis/comparison/hist_comparer.py
@@ -39,7 +39,7 @@
     get_consistent_numpy_entries,
 )
 from ...base import Pipeline
-from ...hist.histogram import HistogramContainer
+from ...hist.hist_utils import COMMON_HIST_TYPES, is_numeric
 from ...stats.numpy import googl_test, ks_prob, ks_test, uu_chi2
 
 
@@ -81,18 +81,18 @@ def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
         raise RuntimeError("Need to provide two histogram column names.")
 
     # basic histogram checks
-    hc1 = row[hist_name1]
-    hc2 = row[hist_name2]
-    if not all([isinstance(hc, HistogramContainer) for hc in [hc1, hc2]]):
+    hist1 = row[hist_name1]
+    hist2 = row[hist_name2]
+    if not all([isinstance(hist, COMMON_HIST_TYPES) for hist in [hist1, hist2]]):
         return x
-    if not check_similar_hists([hc1, hc2]):
+    if not check_similar_hists([hist1, hist2]):
         return x
 
     # compare
-    is_num = hc1.is_num
-    if hc1.n_dim == 1:
+    is_num = is_numeric(hist1)
+    if hist1.n_dim == 1:
         if is_num:
-            numpy_1dhists = get_consistent_numpy_1dhists([hc1, hc2])
+            numpy_1dhists = get_consistent_numpy_1dhists([hist1, hist2])
             entries_list = [nphist[0] for nphist in numpy_1dhists]
             # KS-test only properly defined for (ordered) 1D interval variables
             ks_testscore = ks_test(*entries_list)
@@ -101,14 +101,14 @@ def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
             x["ks_pvalue"] = ks_pvalue
             x["ks_zscore"] = -norm.ppf(ks_pvalue)
         else:  # categorical
-            entries_list = get_consistent_numpy_entries([hc1, hc2])
+            entries_list = get_consistent_numpy_entries([hist1, hist2])
             # check consistency of bin_labels
-            labels1 = hc1.hist.bin_labels()
-            labels2 = hc2.hist.bin_labels()
+            labels1 = hist1.bin_labels()
+            labels2 = hist2.bin_labels()
             subset = set(labels1) <= set(labels2)
             unknown_labels = int(not subset)
-    elif hc1.n_dim == 2:
-        numpy_2dgrids = get_consistent_numpy_2dgrids([hc1, hc2])
+    elif hist1.n_dim == 2:
+        numpy_2dgrids = get_consistent_numpy_2dgrids([hist1, hist2])
         entries_list = [entry.flatten() for entry in numpy_2dgrids]
 
     # calculate pearson coefficient
diff --git a/popmon/analysis/functions.py b/popmon/analysis/functions.py
index a6ce140c..131140e5 100644
--- a/popmon/analysis/functions.py
+++ b/popmon/analysis/functions.py
@@ -30,7 +30,7 @@
     get_consistent_numpy_entries,
     set_2dgrid,
 )
-from ..hist.histogram import HistogramContainer
+from ..hist.hist_utils import COMMON_HIST_TYPES, is_numeric
 from ..stats.numpy import probability_distribution_mean_covariance
 
 
@@ -311,7 +311,7 @@ def hist_sum(x, hist_name=""):
 
     Usage: df['hists'].apply(hist_sum) ; series.apply(hist_sum)
 
-    :param pd.Series x: pandas series to extract HistogramContainer list from.
+    :param pd.Series x: pandas series to extract histogram list from.
     :param str hist_name: name of column to extract histograms from. needs to be set with axis=1 (optional)
     :return: sum histogram
     """
@@ -331,20 +331,21 @@ def hist_sum(x, hist_name=""):
     o[hist_name] = None
 
     # basic checks
-    all_hc = all([isinstance(hc, HistogramContainer) for hc in hist_list])
-    if not all_hc:
+    all_hist = all([isinstance(hist, COMMON_HIST_TYPES) for hist in hist_list])
+    if not all_hist:
         return o
+
     similar = check_similar_hists(hist_list)
     if not similar:
         return o
 
     # MB FIX: h_sum not initialized correctly in a sum by histogrammar for sparselybin (origin); below it is.
-    # h_sum = np.sum([hc.hist for hc in hist_list])
+    # h_sum = np.sum([hist for hist in hist_list])
 
-    h_sum = hist_list[0].hist.zero()
-    for hc in hist_list:
-        h_sum += hc.hist
-    o[hist_name] = HistogramContainer(h_sum)
+    h_sum = hist_list[0].zero()
+    for hist in hist_list:
+        h_sum += hist
+    o[hist_name] = h_sum
     return o
 
 
@@ -386,7 +387,7 @@ def normalized_hist_mean_cov(x, hist_name=""):
 
     Usage: df['hists'].apply(normalized_hist_mean_cov) ; series.apply(normalized_hist_mean_cov)
 
-    :param pd.Series x: pandas series to extract HistogramContainer list from.
+    :param pd.Series x: pandas series to extract histogram list from.
     :param str hist_name: name of column to extract histograms from. needs to be set with axis=1 (optional)
     :return: mean normalized histogram, covariance probability matrix
     """
@@ -408,8 +409,8 @@ def normalized_hist_mean_cov(x, hist_name=""):
     o[hist_name + "_binning"] = None
 
     # basic checks
-    all_hc = all([isinstance(hc, HistogramContainer) for hc in hist_list])
-    if not all_hc:
+    all_hist = all([isinstance(hist, COMMON_HIST_TYPES) for hist in hist_list])
+    if not all_hist:
         return o
     similar = check_similar_hists(hist_list)
     if not similar:
@@ -470,13 +471,13 @@ def relative_chi_squared(
     if not all(r in row for r in required):
         return x
 
-    hc = row[hist_name]
+    hist = row[hist_name]
     norm_mean = row[hist_name + suffix_mean]
     cov = row[hist_name + suffix_cov]
     binning = row[hist_name + suffix_binning]
 
     # basic checks
-    if not isinstance(hc, HistogramContainer):
+    if not isinstance(hist, COMMON_HIST_TYPES):
         return x
     if any([ho is None for ho in [norm_mean, cov, binning]]):
         return x
@@ -486,15 +487,15 @@ def relative_chi_squared(
     variance = np.diagonal(cov)
 
     # get entries as numpy arrays
-    if hc.n_dim == 1:
+    if hist.n_dim == 1:
         entries = (
-            hc.hist.bin_entries(xvalues=binning)
-            if hc.is_num
-            else hc.hist.bin_entries(labels=binning)
+            hist.bin_entries(xvalues=binning)
+            if is_numeric(hist)
+            else hist.bin_entries(labels=binning)
         )
     else:
         assert len(binning) == 2
-        entries = set_2dgrid(hc.hist, binning[0], binning[1])
+        entries = set_2dgrid(hist, binning[0], binning[1])
         entries = entries.flatten()
 
     # calculation of mean normalized histogram and its covariance matrix of input histogram
diff --git a/popmon/analysis/hist_numpy.py b/popmon/analysis/hist_numpy.py
index 5f674698..f3e88f29 100644
--- a/popmon/analysis/hist_numpy.py
+++ b/popmon/analysis/hist_numpy.py
@@ -20,10 +20,11 @@
 
 import warnings
 
+import histogrammar
 import numpy as np
+from histogrammar.util import get_hist_props
 
-from ..hist.histogram import HistogramContainer, get_hist_props
-from ..hist.patched_histogrammer import histogrammar
+from ..hist.hist_utils import is_numeric
 
 used_hist_types = (histogrammar.Bin, histogrammar.SparselyBin, histogrammar.Categorize)
 
@@ -92,10 +93,10 @@ def set_2dgrid(hist, xkeys, ykeys):
                 continue
             i = xkeys.index(k)
             if hasattr(h, "bins"):
-                for l, g in h.bins.items():
-                    if l not in ykeys:
+                for ll, g in h.bins.items():
+                    if ll not in ykeys:
                         continue
-                    j = ykeys.index(l)
+                    j = ykeys.index(ll)
                     grid[j, i] = g.entries  # sum_entries(g)
             elif hasattr(h, "values"):
                 for j, g in enumerate(h.values):
@@ -104,10 +105,10 @@ def set_2dgrid(hist, xkeys, ykeys):
     elif hasattr(hist, "values"):
         for i, h in enumerate(hist.values):
             if hasattr(h, "bins"):
-                for l, g in h.bins.items():
-                    if l not in ykeys:
+                for ll, g in h.bins.items():
+                    if ll not in ykeys:
                         continue
-                    j = ykeys.index(l)
+                    j = ykeys.index(ll)
                     grid[j, i] = g.entries
             elif hasattr(h, "values"):
                 for j, g in enumerate(h.values):
@@ -140,21 +141,18 @@ def get_2dgrid(hist, get_bin_labels=False):
     return grid
 
 
-def get_consistent_numpy_2dgrids(hc_list=[], get_bin_labels=False):
+def get_consistent_numpy_2dgrids(hist_list=[], get_bin_labels=False):
     """Get list of consistent x,y grids of first two dimensions of (sparse) input histograms
 
-    :param list hc_list: list of input histogrammar histograms
+    :param list hist_list: list of input histogrammar histograms
     :param bool get_bin_labels: if true, return x-keys and y-keys describing binnings of 2d-grid.
     :return: list of consistent x,y grids of first two dimensions of each input histogram in list
     """
     # --- basic checks
-    if len(hc_list) == 0:
+    if len(hist_list) == 0:
         raise ValueError("Input histogram list has zero length.")
-    assert_similar_hists(hc_list)
+    assert_similar_hists(hist_list)
 
-    hist_list = [
-        hc.hist if isinstance(hc, HistogramContainer) else hc for hc in hc_list
-    ]
     xkeys = set()
     ykeys = set()
     for hist in hist_list:
@@ -180,22 +178,19 @@ def get_consistent_numpy_2dgrids(hc_list=[], get_bin_labels=False):
     return grid2d_list
 
 
-def get_consistent_numpy_1dhists(hc_list, get_bin_labels=False):
+def get_consistent_numpy_1dhists(hist_list, get_bin_labels=False):
     """Get list of consistent numpy hists for list of sparse input histograms
 
     Note: a numpy histogram is a union of lists of bin_edges and number of entries
 
-    :param list hc_list: list of input HistogramContainer objects
+    :param list hist_list: list of input histogram objects
     :return: list of consistent 1d numpy hists for list of sparse input histograms
     """
     # --- basic checks
-    if len(hc_list) == 0:
+    if len(hist_list) == 0:
         raise RuntimeError("Input histogram list has zero length.")
-    assert_similar_hists(hc_list)
+    assert_similar_hists(hist_list)
 
-    hist_list = [
-        hc.hist if isinstance(hc, HistogramContainer) else hc for hc in hc_list
-    ]
     low_arr = [hist.low for hist in hist_list if hist.low is not None]
     high_arr = [hist.high for hist in hist_list if hist.high is not None]
 
@@ -231,50 +226,48 @@ def get_consistent_numpy_1dhists(hc_list, get_bin_labels=False):
         return nphist_list
 
 
-def get_consistent_numpy_entries(hc_list, get_bin_labels=False):
+def get_consistent_numpy_entries(hist_list, get_bin_labels=False):
     """Get list of consistent numpy bin_entries for list of 1d input histograms
 
     :param list hist_list: list of input histogrammar histograms
     :return: list of consistent 1d numpy arrays with bin_entries for list of input histograms
     """
     # --- basic checks
-    if len(hc_list) == 0:
+    if len(hist_list) == 0:
         raise RuntimeError("Input histogram list has zero length.")
-    assert_similar_hists(hc_list)
+    assert_similar_hists(hist_list)
 
     # datatype check
     is_num_arr = []
-    for hc in hc_list:
-        is_num_arr.append(hc.is_num)
+    for hist in hist_list:
+        is_num_arr.append(is_numeric(hist))
     all_num = all(is_num_arr)
     all_cat = not any(is_num_arr)
     if not (all_num or all_cat):
         raise TypeError(
-            "Input histograms are mixture of Bin/SparselyBin and Categorize types.".format(
-                n=hc_list[0].hist.n_dim
-            )
+            "Input histograms are mixture of Bin/SparselyBin and Categorize types."
         )
 
     # union of all labels encountered
     labels = set()
-    for hc in hc_list:
-        bin_labels = hc.hist.bin_centers() if all_num else hc.hist.bin_labels()
+    for hist in hist_list:
+        bin_labels = hist.bin_centers() if all_num else hist.bin_labels()
         labels = labels.union(bin_labels)
     labels = sorted(labels)
 
     # PATCH: deal with boolean labels, which get bin_labels() returns as strings
     cat_labels = labels
-    props = get_hist_props(hc_list[0])
+    props = get_hist_props(hist_list[0])
     if props["is_bool"]:
         cat_labels = [lab == "True" for lab in cat_labels]
 
     # collect list of consistent bin_entries
     entries_list = []
-    for hc in hc_list:
+    for hist in hist_list:
         entries = (
-            hc.hist.bin_entries(xvalues=labels)
+            hist.bin_entries(xvalues=labels)
             if all_num
-            else hc.hist.bin_entries(labels=cat_labels)
+            else hist.bin_entries(labels=cat_labels)
         )
         entries_list.append(entries)
 
@@ -301,18 +294,15 @@ def get_contentType(hist):
     return "Count"
 
 
-def check_similar_hists(hc_list, check_type=True, assert_type=used_hist_types):
+def check_similar_hists(hist_list, check_type=True, assert_type=used_hist_types):
     """Check consistent list of input histograms
 
     Check that type and dimension of all histograms in input list are the same.
 
-    :param list hc_list: list of input HistogramContainer objects to check on consistency
+    :param list hist_list: list of input histogram objects to check on consistency
     :param bool check_type: if true, also check type consistency of histograms (besides n-dim and datatype).
     :return: bool indicating if lists are similar
     """
-    hist_list = [
-        hc.hist if isinstance(hc, HistogramContainer) else hc for hc in hc_list
-    ]
     if len(hist_list) < 1:
         return True
     for hist in hist_list:
@@ -414,37 +404,36 @@ def check_similar_hists(hc_list, check_type=True, assert_type=used_hist_types):
                 if hist.num > 0:
                     sub_hist_list.append(hist.values[0])
         # iterate down
-        sub_hc_list = [HistogramContainer(h) for h in sub_hist_list]
-        if not check_similar_hists(sub_hc_list):
+        if not check_similar_hists(sub_hist_list):
             return False
 
     return True
 
 
-def assert_similar_hists(hc_list, check_type=True, assert_type=used_hist_types):
+def assert_similar_hists(hist_list, check_type=True, assert_type=used_hist_types):
     """Assert consistent list of input histograms
 
     Assert that type and dimension of all histograms in input list are the same.
 
-    :param list hc_list: list of input HistogramContainer objects to check on consistency
+    :param list hist_list: list of input histogram objects to check on consistency
     :param bool assert_type: if true, also assert type consistency of histograms (besides n-dim and datatype).
     """
     similar = check_similar_hists(
-        hc_list, check_type=check_type, assert_type=assert_type
+        hist_list, check_type=check_type, assert_type=assert_type
     )
     if not similar:
         raise ValueError("Input histograms are not all similar.")
 
 
-def check_same_hists(hc1, hc2):
+def check_same_hists(hist1, hist2):
     """Check if two hists are the same
 
-    :param hc1: input histogram container 1
-    :param hc2: input histogram container 2
+    :param hist1: input histogram 1
+    :param hist2: input histogram 2
     :return: boolean, true if two histograms are the same
     """
-    same = check_similar_hists([hc1, hc2])
-    same &= hc1.hist.entries == hc2.hist.entries
-    same &= hc1.hist.n_bins == hc2.hist.n_bins
-    same &= hc1.hist.quantity.name == hc2.hist.quantity.name
+    same = check_similar_hists([hist1, hist2])
+    same &= hist1.entries == hist2.entries
+    same &= hist1.n_bins == hist2.n_bins
+    same &= hist1.quantity.name == hist2.quantity.name
     return same
diff --git a/popmon/analysis/profiling/hist_profiler.py b/popmon/analysis/profiling/hist_profiler.py
index d09e5689..86cbc3eb 100644
--- a/popmon/analysis/profiling/hist_profiler.py
+++ b/popmon/analysis/profiling/hist_profiler.py
@@ -26,7 +26,7 @@
 
 from ...analysis.hist_numpy import get_2dgrid
 from ...base import Module
-from ...hist.histogram import sum_entries
+from ...hist.hist_utils import get_bin_centers, is_numeric, is_timestamp, sum_entries
 
 DEFAULT_STATS = {
     "mean": pm_np.mean,
@@ -97,12 +97,12 @@ def __init__(
                 f"No stats function dict is provided. {self.stats_functions.keys()} is set as default"
             )
 
-    def _profile_1d_histogram(self, name, hc):
-        is_num = hc.is_num
-        is_ts = hc.is_ts or name in self.var_timestamp
+    def _profile_1d_histogram(self, name, hist):
+        is_num = is_numeric(hist)
+        is_ts = is_timestamp(hist) or name in self.var_timestamp
 
-        bin_labels = np.array(hc.get_bin_centers()[0])
-        bin_counts = np.array([v.entries for v in hc.get_bin_centers()[1]])
+        bin_labels = np.array(get_bin_centers(hist)[0])
+        bin_counts = np.array([v.entries for v in get_bin_centers(hist)[1]])
 
         if len(bin_counts) == 0:
             self.logger.warning(f'Histogram "{name}" is empty; skipping.')
@@ -114,12 +114,10 @@ def _profile_1d_histogram(self, name, hc):
 
         profile = dict()
         profile["filled"] = bin_counts.sum()
-        profile["nan"] = hc.hist.nanflow.entries if hasattr(hc.hist, "nanflow") else 0
-        profile["overflow"] = (
-            hc.hist.overflow.entries if hasattr(hc.hist, "overflow") else 0
-        )
+        profile["nan"] = hist.nanflow.entries if hasattr(hist, "nanflow") else 0
+        profile["overflow"] = hist.overflow.entries if hasattr(hist, "overflow") else 0
         profile["underflow"] = (
-            hc.hist.underflow.entries if hasattr(hc.hist, "underflow") else 0
+            hist.underflow.entries if hasattr(hist, "underflow") else 0
         )
         profile["count"] = profile["filled"] + profile["nan"]
         profile["distinct"] = len(np.unique(bin_labels))
@@ -147,19 +145,19 @@ def _profile_1d_histogram(self, name, hc):
 
         return profile
 
-    def _profile_2d_histogram(self, name, hc):
-        if hc.n_dim < 2:
+    def _profile_2d_histogram(self, name, hist):
+        if hist.n_dim < 2:
             self.logger.warning(
-                f"Histogram {name} has {hc.n_dim} dimensions (<2); cannot profile. Returning empty."
+                f"Histogram {name} has {hist.n_dim} dimensions (<2); cannot profile. Returning empty."
             )
             return []
         try:
-            grid = get_2dgrid(hc.hist)
+            grid = get_2dgrid(hist)
         except Exception as e:
             raise e
 
         # calc some basic 2d-histogram statistics
-        sume = int(sum_entries(hc.hist))
+        sume = int(sum_entries(hist))
 
         # calculate phik correlation
         try:
@@ -180,7 +178,7 @@ def _profile_hist(self, split, hist_name):
 
         hist0 = split[0][self.hist_col]
         dimension = hist0.n_dim
-        is_num = hist0.is_num
+        is_num = is_numeric(hist0)
 
         # these are the profiled quantities we will monitor
         fields = []
@@ -197,14 +195,14 @@ def _profile_hist(self, split, hist_name):
         # now loop over split-axis, e.g. time index, and profile each sub-hist x:y
         profile_list = []
         for hist_dict in split:
-            index, hc = hist_dict[self.index_col], hist_dict[self.hist_col]
+            index, hist = hist_dict[self.index_col], hist_dict[self.hist_col]
 
-            profile = {self.index_col: index, self.hist_col: hc}
+            profile = {self.index_col: index, self.hist_col: hist}
 
             if dimension == 1:
-                profile.update(self._profile_1d_histogram(hist_name, hc))
+                profile.update(self._profile_1d_histogram(hist_name, hist))
             elif dimension == 2:
-                profile.update(self._profile_2d_histogram(hist_name, hc))
+                profile.update(self._profile_2d_histogram(hist_name, hist))
 
             if sorted(profile.keys()) != sorted(
                 fields + [self.index_col, self.hist_col]
@@ -228,10 +226,10 @@ def transform(self, datastore):
 
         for feature in features[:]:
             df = self.get_datastore_object(data, feature, dtype=pd.DataFrame)
-            hc_split_list = df.reset_index().to_dict("records")
+            hist_split_list = df.reset_index().to_dict("records")
 
             self.logger.debug(f'Profiling histogram "{feature}".')
-            profile_list = self._profile_hist(split=hc_split_list, hist_name=feature)
+            profile_list = self._profile_hist(split=hist_split_list, hist_name=feature)
             if len(profile_list) > 0:
                 profiled[feature] = pd.DataFrame(profile_list).set_index(
                     [self.index_col]
diff --git a/popmon/config.py b/popmon/config.py
index 17a7d480..8a24857f 100644
--- a/popmon/config.py
+++ b/popmon/config.py
@@ -47,11 +47,11 @@
     "chi2_pvalue": "p-value of the chi-squared statistic, comparing each time slot with {ref}",
     "chi2_zscore": "Z-score of the chi-squared statistic, comparing each time slot with {ref}",
     "chi2_max_residual": "The largest absolute normalized residual (|chi|) observed in all bin pairs "
-    + "(one histogram in a time slot and one in {ref})",  # noqa: W504
+    + "(one histogram in a time slot and one in {ref})",  # noqa: W503
     "chi2_spike_count": "The number of normalized residuals of all bin pairs (one histogram in a time"
-    + " slot and one in {ref}) with absolute value bigger than a given threshold (default: 7).",  # noqa: W504
+    + " slot and one in {ref}) with absolute value bigger than a given threshold (default: 7).",  # noqa: W503
     "max_prob_diff": "The largest absolute difference between all bin pairs of two normalized "
-    + "histograms (one histogram in a time slot and one in {ref})",  # noqa: W504
+    + "histograms (one histogram in a time slot and one in {ref})",  # noqa: W503
     "unknown_labels": "Are categories observed in a given time slot that are not present in {ref}?",
 }
 
diff --git a/popmon/decorators/pandas.py b/popmon/decorators/pandas.py
index e766a62c..dc88ed30 100644
--- a/popmon/decorators/pandas.py
+++ b/popmon/decorators/pandas.py
@@ -18,13 +18,14 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
+from histogrammar.dfinterface.make_histograms import make_histograms
 from pandas import DataFrame
 
-from ..hist.filling import make_histograms
 from ..pipeline.metrics import df_stability_metrics
 from ..pipeline.report import df_stability_report
 
-# add function to create histogrammar histograms
+# add function to create histogrammar histograms.
+# pm_make_histograms is kept for bkw compatibility.
 DataFrame.pm_make_histograms = make_histograms
 
 # add function to create stability report
diff --git a/popmon/decorators/spark.py b/popmon/decorators/spark.py
index 95eff7ba..52a9cd8c 100644
--- a/popmon/decorators/spark.py
+++ b/popmon/decorators/spark.py
@@ -18,7 +18,8 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
-from popmon.hist.filling import make_histograms
+from histogrammar.dfinterface.make_histograms import make_histograms
+
 from popmon.pipeline.metrics import df_stability_metrics
 from popmon.pipeline.report import df_stability_report
 
diff --git a/popmon/hist/filling/__init__.py b/popmon/hist/filling/__init__.py
index e23a95f5..333df22b 100644
--- a/popmon/hist/filling/__init__.py
+++ b/popmon/hist/filling/__init__.py
@@ -17,22 +17,22 @@
 # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
+# MB 20210323: histogrammming code hade been moved to histogrammar v1.0.20+
+#              these imports are kept for backwards compatibility.
 
-from ...hist.filling.make_histograms import (
+from histogrammar.dfinterface.make_histograms import (
     get_bin_specs,
     get_one_time_axis,
     get_time_axes,
     has_one_time_axis,
     make_histograms,
 )
-from ...hist.filling.numpy_histogrammar import NumpyHistogrammar
-from ...hist.filling.pandas_histogrammar import PandasHistogrammar
-from ...hist.filling.spark_histogrammar import SparkHistogrammar
+from histogrammar.dfinterface.pandas_histogrammar import PandasHistogrammar
+from histogrammar.dfinterface.spark_histogrammar import SparkHistogrammar
 
 __all__ = [
     "PandasHistogrammar",
     "SparkHistogrammar",
-    "NumpyHistogrammar",
     "make_histograms",
     "get_time_axes",
     "get_one_time_axis",
diff --git a/popmon/hist/filling/histogram_filler_base.py b/popmon/hist/filling/histogram_filler_base.py
deleted file mode 100644
index 4a4fe570..00000000
--- a/popmon/hist/filling/histogram_filler_base.py
+++ /dev/null
@@ -1,495 +0,0 @@
-"""
-Copyright Eskapade:
-License Apache-2: https://github.com/KaveIO/Eskapade-Core/blob/master/LICENSE
-Reference link:
-https://github.com/KaveIO/Eskapade/blob/master/python/eskapade/analysis/histogram_filling.py
-All modifications copyright ING WBAA.
-"""
-
-import copy
-import logging
-from collections import defaultdict
-
-import histogrammar as hg
-import numpy as np
-import pandas as pd
-
-from ...base import Module
-from ...hist.filling.utils import check_column, check_dtype
-
-
-class HistogramFillerBase(Module):
-    """Base class link to fill histograms.
-
-    Timestamp features are
-    converted to nanoseconds before the binning is applied.
-    Semi-clever auto-binning is applied in case no bin specifications are provided.
-    Final histograms are stored in the datastore.
-    """
-
-    def __init__(
-        self,
-        features=None,
-        binning="unit",
-        bin_specs=None,
-        time_axis="",
-        var_dtype=None,
-        read_key=None,
-        store_key=None,
-        nbins_1d=40,
-        nbins_2d=20,
-        nbins_3d=10,
-        max_nunique=500,
-    ):
-        """Initialize module instance.
-
-        Store and do basic check on the attributes HistogramFillerBase.
-
-        :param list features: colums to pick up from input data. (default is all features)
-            For multi-dimensional histograms, separate the column names with a ":"
-            Example features list is:
-
-            .. code-block:: python
-
-                features = ['x', 'date', 'date:x', 'date:y', 'date:x:y']
-
-        :param str binning: default binning to revert to in case bin_specs not supplied. options are:
-            "unit" or "auto", default is "unit". When using "auto", semi-clever binning is automatically done.
-        :param dict bin_specs: dictionaries used for rebinning numeric or timestamp features
-            Example bin_specs dictionary is:
-
-            .. code-block:: python
-
-                bin_specs = {'x': {'bin_width': 1, 'bin_offset': 0},
-                             'y': {'num': 10, 'low': 0.0, 'high': 2.0},
-                             'x:y': [{}, {'num': 5, 'low': 0.0, 'high': 1.0}]}
-
-            In the bin specs for x:y, x reverts to the 1-dim setting.
-
-        :param str time_axis: name of datetime feature, used as time axis, eg 'date'. if True, will be guessed.
-            If time_axis is set, if no features given, features becomes: ['date:x', 'date:y', 'date:z'] etc.
-        :param dict var_dtype: dictionary with specified datatype per feature (optional)
-        :param str read_key: key of input histogram-dict to read from data store .
-            (only required when calling transform(datastore) as module)
-        :param str store_key: key of output data to store in data store
-            (only required when calling transform(datastore) as module)
-        :param int nbins_1d: auto-binning number of bins for 1d histograms. default is 40.
-        :param int nbins_2d: auto-binning number of bins for 2d histograms. default is 20.
-        :param int nbins_3d: auto-binning number of bins for 3d histograms. default is 10.
-        :param int max_nunique: auto-binning threshold for unique categorical values. default is 500.
-        """
-        super().__init__()
-
-        features = features or []
-        self.features = [check_column(c) for c in features]
-        if not any([binning == opt for opt in ["auto", "unit"]]):
-            raise TypeError('binning should be "auto" or "unit".')
-        self.binning = binning
-        self.bin_specs = bin_specs or {}
-        self.time_axis = time_axis
-        var_dtype = var_dtype or {}
-        self.var_dtype = {k: check_dtype(v) for k, v in var_dtype.items()}
-        self.read_key = read_key
-        self.store_key = store_key
-
-        # several default unit values
-        self._unit_bin_specs = {"bin_width": 1.0, "bin_offset": 0.0}
-        self._unit_timestamp_specs = {
-            "bin_width": pd.Timedelta(days=30).value,
-            "bin_offset": pd.Timestamp("2010-01-04").value,
-        }
-        self._auto_n_bins_1d = nbins_1d
-        self._auto_n_bins_2d = nbins_2d
-        self._auto_n_bins_3d = nbins_3d
-        self._nunique_threshold = max_nunique
-
-        # these get filled during execution
-        self._hists = {}
-
-    def assert_dataframe(self, df):
-        """assert dataframe datatype"""
-        raise NotImplementedError("assert_dataframe not implemented!")
-
-    def get_features(self, df):
-        raise NotImplementedError("get_features not implemented!")
-
-    def get_quantiles(self, df, quantiles, columns):
-        """return dict with quantiles for given columns"""
-        raise NotImplementedError("get_quantiles not implemented!")
-
-    def get_nunique(self, df, columns):
-        """return dict with number of unique entries for given columns"""
-        raise NotImplementedError("get_nunique not implemented!")
-
-    def process_features(self, df, cols_by_type):
-        raise NotImplementedError("process_features not implemented!")
-
-    def fill_histograms(self, idf):
-        raise NotImplementedError("fill_histograms not implemented!")
-
-    def construct_empty_hist(self, features):
-        raise NotImplementedError("construct_empty_hist not implemented!")
-
-    def _auto_n_bins(self, c):
-        """Return number of bins for this histogram
-
-        :param list c: list of columns for this histogram
-        :return: number of bins to use for this histogram
-        """
-        if isinstance(c, str):
-            c = [c]
-        if len(self.time_axis) > 0 and c[0] == self.time_axis:
-            # in case of time-axis, use fine-grained binning
-            # do this by removing first element, decreasing size of c.
-            # note that affects original input c, so copy first
-            c = copy.copy(c)
-            del c[0]
-        if len(c) <= 1:
-            return self._auto_n_bins_1d
-        elif len(c) == 2:
-            return self._auto_n_bins_2d
-        elif len(c) == 3:
-            return self._auto_n_bins_3d
-        else:
-            return self._auto_n_bins_3d
-
-    def _execute(self, df):
-        """
-        _execute() does five things:
-
-        * check presence and data type of requested features
-        * timestamp variables are converted to nanosec (integers)
-        * clever auto-binning is done in case no bin-specs have been provided
-        * do the actual value counting based on categories and created indices
-        * then convert to histograms
-        """
-        df = self.assert_dataframe(df)
-
-        # 1. check presence and data type of requested features
-        # sort features into numerical, timestamp and category based
-        cols_by_type = self.categorize_features(df)
-
-        # 2. assign features to make histograms of (if not already provided)
-        #    and figure out time-axis if provided
-        #    check if all features are present in dataframe
-        self.assign_and_check_features(df, cols_by_type)
-
-        # 3. timestamp variables are converted to ns here
-        idf = self.process_features(df, cols_by_type)
-
-        # 4. complete bin-specs that have not been provided in case of 'auto' binning option
-        if self.binning == "auto":
-            self.auto_complete_bin_specs(idf, cols_by_type)
-
-        # 5. do the actual histogram/counter filling
-        self.logger.info(
-            f"Filling {len(self.features)} specified histograms. {self.binning}-binning."
-        )
-        self.fill_histograms(idf)
-
-        return self._hists
-
-    def assign_and_check_features(self, df, cols_by_type):
-        """auto assign feature to make histograms of and do basic checks on them
-
-        :param df: input dateframe
-        :param cols_by_type: dict of columns classified by type
-        """
-        # user leaves feature selection up to us
-        no_initial_features = len(self.features) == 0
-
-        all_cols = (
-            list(cols_by_type["num"])
-            + list(cols_by_type["dt"])
-            + list(cols_by_type["str"])
-        )
-
-        # 1. assign / figure out a time axis
-        if isinstance(self.time_axis, str) and len(self.time_axis) > 0:
-            # a) specified time axis
-            if self.time_axis not in all_cols:
-                raise RuntimeError(
-                    f'Specified time-axis "{self.time_axis}" not found in dataframe.'
-                )
-        elif isinstance(self.time_axis, bool) and self.time_axis:
-            # b) try to figure out time axis
-            self.time_axis = ""
-            num = len(cols_by_type["dt"])
-            if num == 1:
-                # the obvious choice
-                self.time_axis = list(cols_by_type["dt"])[0]
-                self.logger.info(f'Time-axis automatically set to "{self.time_axis}"')
-            elif num == 0:
-                self.logger.warning(
-                    "No obvious time-axes found to choose from. So not used."
-                )
-            else:
-                self.logger.warning(
-                    f'Found {num} time-axes: {cols_by_type["dt"]}. Set *one* time_axis manually! Now NOT used.'
-                )
-        else:
-            # c) no time axis
-            self.time_axis = ""
-
-        # 2. assign all features to make histograms of, in case not provided by user
-        if no_initial_features:
-            if len(self.time_axis) > 0:
-                # time-axis is selected: make histograms of all columns in dataframe vs time-axis
-                self.features = [
-                    [self.time_axis, c]
-                    for c in sorted(self.get_features(df))
-                    if c != self.time_axis
-                ]
-            else:
-                # make histograms of all columns in dataframe
-                self.features = [[c] for c in sorted(self.get_features(df))]
-
-        # 3. check presence of all features (in case provided by user)
-        all_selected_cols = np.unique([j for i in self.features for j in i])
-        for c in all_selected_cols:
-            if c not in self.get_features(df):
-                raise RuntimeError(f"Requested feature {c} not in dataframe.")
-
-        # 4. check number of unique entries for categorical features
-        #    this can be an expensive call, so avoid if possible. do run however when debugging.
-        if no_initial_features or self.logger.level == logging.DEBUG:
-            str_cols = [c for c in all_selected_cols if c in cols_by_type["str"]]
-            nuniq = self.get_nunique(df, str_cols)
-            huge_cats = []
-            for c in str_cols:
-                if nuniq[c] < self._nunique_threshold:
-                    continue
-                if no_initial_features:
-                    # we're the boss. we're not going to histogram this ...
-                    huge_cats.append(c)
-                else:  # debug mode
-                    self.logger.warning(
-                        f"Column {c} has {nuniq[c]} unique entries (large). Really histogram it?"
-                    )
-            # scrub self.features of huge categories.
-            self.features = [
-                cols
-                for cols in self.features
-                if not any([c in huge_cats for c in cols])
-            ]
-
-    def auto_complete_bin_specs(self, df, cols_by_type):
-        """auto complete the bin-specs that have not been provided
-
-        :param df: input dataframe
-        :param cols_by_type: dict of columns classified by type
-        """
-        # auto-determine binning of numerical and time features for which no bin_specs exist
-        # do this based on range of 5-95% quantiles, so extreme outliers are binned separately
-        # otherwise, the idea is to always reuse 1-dim binning for high n-dim, if those exist.
-        bs_keys = list(self.bin_specs.keys())  # create initial unchanging list of keys
-        all_selected_cols = np.unique([j for i in self.features for j in i])
-        cols = list(cols_by_type["num"]) + list(cols_by_type["dt"])
-        num_cols = [c for c in all_selected_cols if c in cols and c not in bs_keys]
-
-        # quantiles for bin specs
-        int_cols = [c for c in num_cols if c in cols_by_type["int"]]
-        quantiles_i = self.get_quantiles(df, quantiles=[0.0, 1.0], columns=int_cols)
-        float_cols = [c for c in num_cols if c not in cols_by_type["int"]]
-        quantiles_f = self.get_quantiles(df, quantiles=[0.05, 0.95], columns=float_cols)
-
-        for cols in self.features:
-            n = ":".join(cols)
-            if len(cols) == 1 and n not in num_cols:
-                continue
-            if n in bs_keys:
-                # already provided; will pick that one up
-                continue
-            # get default number of bins for n-dim histogram
-            n_bins = self._auto_n_bins(cols)
-            specs = []
-            for idx, c in enumerate(cols):
-                if c not in num_cols or c in bs_keys:
-                    # skip categorical; revert to what is already provided by user at 1dim-level
-                    specs.append({})
-                    continue
-
-                if c in float_cols:
-                    q = quantiles_f[c]
-                    # by default, n_bins covers range 5-95% quantiles + we add 10%
-                    # basically this gives a nice plot when plotted
-                    # specs for Bin and Sparselybin histograms
-                    if q[1] == q[0]:
-                        # in case of highly imbalanced data it can happen that q05=q95. If so use min and max instead.
-                        q = (self.get_quantiles(df, quantiles=[0.0, 1.0], columns=[c]))[
-                            c
-                        ]
-                    qdiff = (q[1] - q[0]) * (1.0 / 0.9) if q[1] > q[0] else 1.0
-                    bin_width = qdiff / float(n_bins)
-                    bin_offset = q[0] - qdiff * 0.05
-                    low = q[0] - qdiff * 0.05
-                    high = q[1] + qdiff * 0.05
-                elif c in int_cols:
-                    # for ints use bins around integer values
-                    low = quantiles_i[c][0]
-                    high = quantiles_i[c][1]
-                    bin_width = np.max((np.round((high - low) / float(n_bins)), 1.0))
-                    bin_offset = low = np.floor(low - 0.5) + 0.5
-                    n_bins = int((high - low) // bin_width) + int(
-                        (high - low) % bin_width > 0.0
-                    )
-                    high = low + n_bins * bin_width
-
-                if c == self.time_axis and idx == 0:
-                    # time axis is always sparselybin (unbound)
-                    specs.append({"bin_width": bin_width, "bin_offset": bin_offset})
-                elif len(cols) >= 3:
-                    # always binned histogram for high n-dim histograms, avoid potentially exploding histograms
-                    specs.append({"num": n_bins, "low": low, "high": high})
-                else:
-                    # sparse allowed for low dimensional histograms (1 and 2 dim)
-                    specs.append({"bin_width": bin_width, "bin_offset": bin_offset})
-            if len(cols) == 1:
-                specs = specs[0]
-            self.bin_specs[n] = specs
-
-    def get_data_type(self, df, col):
-        """Get data type of dataframe column.
-
-        :param df: input data frame
-        :param str col: column
-        """
-        if col not in self.get_features(df):
-            raise KeyError(f'column "{col:s}" not in input dataframe')
-        return df[col].dtype
-
-    def categorize_features(self, df):
-        """Categorize features of dataframe by data type.
-
-        :param df: input (pandas) data frame
-        """
-        # check presence and data type of requested features
-        # sort features into numerical, timestamp and category based
-        cols_by_type = defaultdict(set)
-
-        features = (
-            self.features if self.features else [[c] for c in self.get_features(df)]
-        )
-
-        for col_list in features:
-            for col in col_list:
-
-                dt = check_dtype(self.get_data_type(df, col))
-
-                if col not in self.var_dtype:
-                    self.var_dtype[col] = dt
-
-                if np.issubdtype(dt, np.integer):
-                    colset = cols_by_type["int"]
-                    if col not in colset:
-                        colset.add(col)
-                if np.issubdtype(dt, np.number):
-                    colset = cols_by_type["num"]
-                    if col not in colset:
-                        colset.add(col)
-                elif np.issubdtype(dt, np.datetime64):
-                    colset = cols_by_type["dt"]
-                    if col not in colset:
-                        colset.add(col)
-                else:
-                    colset = cols_by_type["str"]
-                    if col not in colset:
-                        colset.add(col)
-
-                self.logger.debug(
-                    'Data type of column "{col}" is "{type}".'.format(
-                        col=col, type=self.var_dtype[col]
-                    )
-                )
-        return cols_by_type
-
-    def var_bin_specs(self, c, idx=0):
-        """Determine bin_specs to use for variable c.
-
-        :param list c: list of variables, or string variable
-        :param int idx: index of the variable in c, for which to return the bin specs. default is 0.
-        :return: selected bin_specs of variable
-        """
-        if isinstance(c, str):
-            c = [c]
-        n = ":".join(c)
-
-        # determine default bin specs
-        dt = np.dtype(self.var_dtype[c[idx]])
-        is_timestamp = isinstance(dt.type(), np.datetime64)
-        default = (
-            self._unit_bin_specs if not is_timestamp else self._unit_timestamp_specs
-        )
-
-        # get bin specs
-        if n in self.bin_specs and len(c) > 1 and len(c) == len(self.bin_specs[n]):
-            result = self.bin_specs[n][idx]
-            if not result:
-                result = self.bin_specs.get(c[idx], default)
-        else:
-            result = self.bin_specs.get(c[idx], default)
-        return result
-
-    def get_histograms(self, input_df):
-        """Handy function to directly get dict of histograms corresponding to input dataframe.
-
-        :param input_df: spark/pandas input dataframe
-        :return: dict of histograms
-        """
-        return self._execute(input_df)
-
-    def get_features_specs(self):
-        """Return bin specifications used to generate histograms
-
-        Can then be passed on to other histogram filler to get identical histograms.
-        """
-        features = [":".join(c) for c in self.features]  # rejoin substrings
-        return features, self.bin_specs, self.var_dtype, self.time_axis
-
-    def transform(self, datastore):
-        """Transform function called when used as module in a pipeline
-
-        :param dict datastore: input datastore
-        :return: datastore
-        """
-        if not isinstance(self.read_key, str) and len(self.read_key) > 0:
-            raise ValueError("read_key has not been properly set.")
-        if not isinstance(self.store_key, str) and len(self.store_key) > 0:
-            raise ValueError("store_key has not been properly set.")
-        if self.read_key not in datastore:
-            raise KeyError("read_key not found in datastore")
-
-        df = datastore[self.read_key]
-        hists = self.get_histograms(df)
-        datastore[self.store_key] = hists
-        return datastore
-
-    def get_hist_bin(self, hist, features, quant, col, dt):
-        is_number = np.issubdtype(dt, np.number)
-        is_timestamp = np.issubdtype(dt, np.datetime64)
-
-        if is_number or is_timestamp:
-            # numbers and timestamps are put in a sparse binned histogram
-            specs = self.var_bin_specs(features, features.index(col))
-            if "bin_width" in specs:
-                hist = hg.SparselyBin(
-                    binWidth=specs["bin_width"],
-                    origin=specs.get("bin_offset", 0),
-                    quantity=quant,
-                    value=hist,
-                )
-            elif "num" in specs and "low" in specs and "high" in specs:
-                hist = hg.Bin(
-                    num=specs["num"],
-                    low=specs["low"],
-                    high=specs["high"],
-                    quantity=quant,
-                    value=hist,
-                )
-            else:
-                raise RuntimeError("Do not know how to interpret bin specifications.")
-        else:
-            # string and booleans are treated as categories
-            hist = hg.Categorize(quantity=quant, value=hist)
-
-        return hist
diff --git a/popmon/hist/filling/make_histograms.py b/popmon/hist/filling/make_histograms.py
deleted file mode 100644
index 650d7d7f..00000000
--- a/popmon/hist/filling/make_histograms.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2020 ING Wholesale Banking Advanced Analytics
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of
-# this software and associated documentation files (the "Software"), to deal in
-# the Software without restriction, including without limitation the rights to
-# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so,
-# subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-
-import copy
-import logging
-
-import histogrammar
-import numpy as np
-import pandas as pd
-
-from ...hist.filling.pandas_histogrammar import PandasHistogrammar
-from ...hist.filling.spark_histogrammar import SparkHistogrammar
-from ...hist.filling.utils import check_dtype
-
-logger = logging.getLogger()
-
-
-def make_histograms(
-    df,
-    features=None,
-    binning="auto",
-    bin_specs=None,
-    time_axis="",
-    time_width=None,
-    time_offset=0,
-    var_dtype=None,
-    ret_specs=False,
-    nbins_1d=40,
-    nbins_2d=20,
-    nbins_3d=10,
-    max_nunique=500,
-):
-    """Create histograms from pandas or spark dataframe.
-
-    :param df: input pandas or spark dataframe to create histograms of.
-    :param list features: columns to pick up from input data. (default is all features)
-        For multi-dimensional histograms, separate the column names with a ":". An example features list is:
-
-        .. code-block:: python
-
-            features = ['x', 'date', 'date:x', 'date:y', 'date:x:y']
-
-    :param str binning: default binning to revert to in case bin_specs not supplied. options are:
-        "unit" or "auto", default is "auto". When using "auto", semi-clever binning is automatically done.
-    :param dict bin_specs: dictionaries used for rebinning numeric or timestamp features. An example bin_specs
-        dictionary is:
-
-        .. code-block:: python
-
-            bin_specs = {'x': {'bin_width': 1, 'bin_offset': 0},
-                         'y': {'num': 10, 'low': 0.0, 'high': 2.0},
-                         'x:y': [{}, {'num': 5, 'low': 0.0, 'high': 1.0}]}
-
-        In the bin specs for x:y, x is not provided (here) and reverts to the 1-dim setting. The 'bin_width',
-        'bin_offset' notation makes an open-ended histogram (for that feature) with given bin width and offset.
-        The notation 'num', 'low', 'high' gives a fixed range histogram from 'low' to 'high' with 'num'
-        number of bins.
-    :param str time_axis: name of datetime feature, used as time axis, eg 'date'. if True, will be guessed.
-        If time_axis is set, if no features given, features becomes: ['date:x', 'date:y', 'date:z'] etc.
-    :param time_width: bin width of time_axis. str or number (ns). note: bin_specs takes precedence. (optional)
-
-        .. code-block:: text
-
-            Examples: '1w', 3600e9 (number of ns),
-                      anything understood by pd.Timedelta(time_width).value
-
-    :param time_offset: bin offset of time_axis. str or number (ns). note: bin_specs takes precedence. (optional)
-
-        .. code-block:: text
-
-            Examples: '1-1-2020', 0 (number of ns since 1-1-1970),
-                      anything parsed by pd.Timestamp(time_offset).value
-
-    :param dict var_dtype: dictionary with specified datatype per feature (optional)
-    :param bool ret_specs: if true, also return features, bin_specs, var_dtype, time_axis used for filling histograms.
-    :param int nbins_1d: auto-binning number of bins for 1d histograms. default is 40.
-    :param int nbins_2d: auto-binning number of bins for 2d histograms. default is 20.
-    :param int nbins_3d: auto-binning number of bins for 3d histograms. default is 10.
-    :param int max_nunique: auto-binning threshold for unique categorical values. default is 500.
-    :return: dict of created histogrammar histograms
-    """
-    # basic checks on presence of time_axis
-    if (not isinstance(time_axis, (str, bool))) or (
-        isinstance(time_axis, bool) and not time_axis
-    ):
-        raise TypeError("time_axis needs to be a string, or a bool set to True")
-    if (
-        isinstance(time_axis, str)
-        and len(time_axis) > 0
-        and time_axis not in df.columns
-    ):
-        raise ValueError(f'time_axis "{time_axis}" not found in columns of dataframe.')
-    if isinstance(time_axis, bool):
-        time_axes = get_time_axes(df)
-        num = len(time_axes)
-        if num == 1:
-            time_axis = time_axes[0]
-            logger.info(f'Time-axis automatically set to "{time_axis}"')
-        elif num == 0:
-            raise RuntimeError(
-                "No obvious time-axes found. Cannot generate stability report."
-            )
-        else:
-            raise RuntimeError(
-                f"Found {num} time-axes: {time_axes}. Set *one* time_axis manually!"
-            )
-
-    # if time_axis present, interpret time_width and time_offset
-    if (
-        isinstance(time_axis, str)
-        and len(time_axis) > 0
-        and isinstance(time_width, (str, int, float))
-        and isinstance(time_offset, (str, int, float))
-    ):
-        if not isinstance(bin_specs, (type(None), dict)):
-            raise RuntimeError("bin_specs object is not a dictionary")
-        bin_specs = copy.copy(bin_specs) if isinstance(bin_specs, dict) else {}
-        if time_axis in bin_specs:
-            raise RuntimeError(
-                f'time-axis "{time_axis}" already found in binning specifications.'
-            )
-        # convert time width and offset to nanoseconds
-        time_specs = {
-            "bin_width": float(pd.Timedelta(time_width).value),
-            "bin_offset": float(pd.Timestamp(time_offset).value),
-        }
-        bin_specs[time_axis] = time_specs
-
-    cls = PandasHistogrammar if isinstance(df, pd.DataFrame) else SparkHistogrammar
-    hist_filler = cls(
-        features=features,
-        binning=binning,
-        bin_specs=bin_specs,
-        time_axis=time_axis,
-        var_dtype=var_dtype,
-        nbins_1d=nbins_1d,
-        nbins_2d=nbins_2d,
-        nbins_3d=nbins_3d,
-        max_nunique=max_nunique,
-    )
-    hists = hist_filler.get_histograms(df)
-
-    if ret_specs:
-        features, binning, var_dtype, time_axis = hist_filler.get_features_specs()
-        return hists, features, binning, time_axis, var_dtype
-
-    return hists
-
-
-def get_data_type(df, col):
-    """Get data type of a column of pandas or spark dataframe.
-
-    :param df: input data frame (pandas or spark)
-    :param str col: column
-    """
-    if col not in df.columns:
-        raise KeyError(f'Column "{col:s}" not in input dataframe.')
-    dt = dict(df.dtypes)[col]
-
-    if hasattr(dt, "type"):
-        # convert pandas types, such as pd.Int64, into numpy types
-        dt = type(dt.type())
-
-    try:
-        # spark conversions to numpy or python equivalent
-        if dt == "string":
-            dt = "str"
-        elif dt == "timestamp":
-            dt = np.datetime64
-        elif dt == "boolean":
-            dt = bool
-        elif dt == "bigint":
-            dt = np.int64
-    except TypeError:
-        pass
-
-    return np.dtype(dt)
-
-
-def get_time_axes(df):
-    """Return all time-axis columns of a dataframe
-
-    :param df: input dataframe (pandas or spark)
-    :return: list of time-axis columns
-    """
-    return [
-        c
-        for c in df.columns
-        if np.issubdtype(check_dtype(get_data_type(df, c)), np.datetime64)
-    ]
-
-
-def has_one_time_axis(df):
-    """Return boolean if one time-axis column in dataframe
-
-    :param df: input dataframe (pandas or spark)
-    :return: boolean if one time-axis column
-    """
-    dt_cols = get_time_axes(df)
-    return len(dt_cols) == 1
-
-
-def get_one_time_axis(df):
-    """Return time-axis if one time-axis column in dataframe
-
-    :param df: input dataframe (pandas or spark)
-    :return: one time-axis column, else empty string
-    """
-    dt_cols = get_time_axes(df)
-    return dt_cols[0] if len(dt_cols) == 1 else ""
-
-
-def _get_bin_specs(h):
-    """Get histogram bin specifications
-
-    :param h: input histogrammar histogram
-    :return: list with bin_specs of all dimensions of the histogram
-    :rtype: list
-    """
-    bin_specs = []
-    if isinstance(h, histogrammar.Count):
-        return bin_specs
-
-    if isinstance(h, histogrammar.Categorize):
-        bin_specs.append({})
-    elif isinstance(h, histogrammar.Bin):
-        bin_specs.append(dict(num=h.num, low=h.low, high=h.high))
-    elif isinstance(h, histogrammar.SparselyBin):
-        bin_specs.append(dict(bin_width=h.binWidth, bin_offset=h.origin))
-
-    # histogram may have a sub-histogram. Extract it and recurse
-    if hasattr(h, "bins"):
-        hist = list(h.bins.values())[0] if h.bins else histogrammar.Count()
-    elif hasattr(h, "values"):
-        hist = h.values[0] if h.values else histogrammar.Count()
-    else:
-        hist = histogrammar.Count()
-    return bin_specs + _get_bin_specs(hist)
-
-
-def _match_first_key(skip_first_axis=None, feature=""):
-    """Helper function to match and remove skip_first_axis from feature
-
-    :param skip_first_axis: True or string. if set, ignore first axis of input histogram(s)
-    :param feature: input feature
-    :return: match and (rest of) feature
-    """
-    assert isinstance(feature, str)
-    karr = feature.split(":")
-    begin = karr[0]
-    rest_key = ":".join(karr[1:])
-    if isinstance(skip_first_axis, bool):
-        return skip_first_axis, rest_key if skip_first_axis else feature
-    elif isinstance(skip_first_axis, str) and len(skip_first_axis) > 0:
-        match = begin == skip_first_axis
-        return match, rest_key if match else feature
-    return False, feature
-
-
-def get_bin_specs(hd, skip_first_axis=False):
-    """Get histogram bin specifications
-
-    :param hd: input histogrammar histogram (or dict of input histograms)
-    :param skip_first_axis: bool or string of first axis. if set, ignore first axis of input histogram(s)
-    :return: list (or dict with lists) with bin_specs of all dimensions of the histogram
-    :rtype: list (or dict)
-    """
-    if isinstance(hd, dict):
-        bin_specs = {}
-        for key, h in hd.items():
-            bs = _get_bin_specs(h)
-            match, rest_key = _match_first_key(skip_first_axis, key)
-            bs = bs[1:] if match else bs
-            bs = bs[0] if len(bs) == 1 else bs
-            bin_specs[rest_key] = bs
-    else:
-        bs = _get_bin_specs(hd)
-        match, _ = _match_first_key(skip_first_axis)
-        bs = bs[1:] if match else bs
-        bs = bs[0] if len(bs) == 1 else bs
-        bin_specs = bs
-    return bin_specs
diff --git a/popmon/hist/filling/numpy_histogrammar.py b/popmon/hist/filling/numpy_histogrammar.py
deleted file mode 100644
index ad21e77a..00000000
--- a/popmon/hist/filling/numpy_histogrammar.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright (c) 2020 ING Wholesale Banking Advanced Analytics
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of
-# this software and associated documentation files (the "Software"), to deal in
-# the Software without restriction, including without limitation the rights to
-# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so,
-# subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-
-import numpy as np
-import pandas as pd
-
-from ...hist.filling.pandas_histogrammar import PandasHistogrammar
-
-
-class NumpyHistogrammar(PandasHistogrammar):
-    """Fill histogrammar histograms.
-
-    Algorithm to fill histogrammar style bin, sparse-bin and category histograms.
-
-    Timestamp features are converted to nanoseconds before
-    the binning is applied. Final histograms are stored in the datastore.
-    """
-
-    def __init__(
-        self,
-        features=None,
-        binning="unit",
-        bin_specs=None,
-        time_axis="",
-        var_dtype=None,
-        read_key=None,
-        store_key=None,
-        nbins_1d=40,
-        nbins_2d=20,
-        nbins_3d=10,
-        max_nunique=500,
-    ):
-        """Initialize module instance.
-
-        Store and do basic check on the attributes HistogramFillerBase.
-
-        :param list features: colums to pick up from input data. (default is all features)
-            For multi-dimensional histograms, separate the column names with a :
-
-            Example features list is:
-
-            .. code-block:: python
-
-                features = ['x', 'date', 'date:x', 'date:y', 'date:x:y']
-
-        :param str binning: default binning to revert to in case bin_specs not supplied. options are:
-            "unit" or "auto", default is "unit". When using "auto", semi-clever binning is automatically done.
-        :param dict bin_specs: dictionaries used for rebinning numeric or timestamp features
-
-            Example bin_specs dictionary is:
-
-            .. code-block:: python
-
-                bin_specs = {'x': {'bin_width': 1, 'bin_offset': 0},
-                             'y': {'num': 10, 'low': 0.0, 'high': 2.0},
-                             'x:y': [{}, {'num': 5, 'low': 0.0, 'high': 1.0}]}
-
-            In the bin specs for x:y, x reverts to the 1-dim setting.
-
-        :param str time_axis: name of datetime feature, used as time axis, eg 'date'. if True, will be guessed.
-            If time_axis is set, if no features given, features becomes: ['date:x', 'date:y', 'date:z'] etc.
-        :param dict var_dtype: dictionary with specified datatype per feature (optional)
-        :param str read_key: key of input histogram-dict to read from data store .
-            (only required when calling transform(datastore) as module)
-        :param str store_key: key of output data to store in data store
-            (only required when calling transform(datastore) as module)
-        :param int nbins_1d: auto-binning number of bins for 1d histograms. default is 40.
-        :param int nbins_2d: auto-binning number of bins for 2d histograms. default is 20.
-        :param int nbins_3d: auto-binning number of bins for 3d histograms. default is 10.
-        :param int max_nunique: auto-binning threshold for unique categorical values. default is 500.
-        """
-        PandasHistogrammar.__init__(
-            self,
-            features,
-            binning,
-            bin_specs,
-            time_axis,
-            var_dtype,
-            read_key,
-            store_key,
-            nbins_1d,
-            nbins_2d,
-            nbins_3d,
-            max_nunique,
-        )
-
-    def _execute(self, df):
-        if not isinstance(df, np.ndarray):
-            raise TypeError("retrieved object not of type np.ndarray")
-        return super()._execute(pd.DataFrame(df))
diff --git a/popmon/hist/filling/pandas_histogrammar.py b/popmon/hist/filling/pandas_histogrammar.py
deleted file mode 100644
index 0a2718ea..00000000
--- a/popmon/hist/filling/pandas_histogrammar.py
+++ /dev/null
@@ -1,264 +0,0 @@
-"""
-Copyright Eskapade:
-License Apache-2: https://github.com/KaveIO/Eskapade-Core/blob/master/LICENSE
-Reference link:
-https://github.com/KaveIO/Eskapade/blob/master/python/eskapade/analysis/links/hist_filler.py
-All modifications copyright ING WBAA.
-"""
-
-import contextlib
-import multiprocessing
-
-import histogrammar as hg
-import joblib
-import numpy as np
-import pandas as pd
-from joblib import Parallel, delayed
-from tqdm import tqdm
-
-from ...hist.filling import utils
-from ...hist.filling.histogram_filler_base import HistogramFillerBase
-
-
-class PandasHistogrammar(HistogramFillerBase):
-    """Fill histogrammar histograms.
-
-    Algorithm to fill histogrammar style bin, sparse-bin and category histograms.
-    Timestamp features are converted to nanoseconds before
-    the binning is applied. Final histograms are stored in the datastore.
-    """
-
-    def __init__(
-        self,
-        features=None,
-        binning="unit",
-        bin_specs=None,
-        time_axis="",
-        var_dtype=None,
-        read_key=None,
-        store_key=None,
-        nbins_1d=40,
-        nbins_2d=20,
-        nbins_3d=10,
-        max_nunique=500,
-    ):
-        """Initialize module instance.
-
-        Store and do basic check on the attributes HistogramFillerBase.
-
-        :param list features: columns to pick up from input data. (default is all features)
-            For multi-dimensional histograms, separate the column names with a :
-
-            Example features list is:
-
-            .. code-block:: python
-
-                features = ['x', 'date', 'date:x', 'date:y', 'date:x:y']
-
-        :param str binning: default binning to revert to in case bin_specs not supplied. options are:
-            "unit" or "auto", default is "unit". When using "auto", semi-clever binning is automatically done.
-        :param dict bin_specs: dictionaries used for rebinning numeric or timestamp features
-
-            Example bin_specs dictionary is:
-
-            .. code-block:: python
-
-                bin_specs = {'x': {'bin_width': 1, 'bin_offset': 0},
-                             'y': {'num': 10, 'low': 0.0, 'high': 2.0},
-                             'x:y': [{}, {'num': 5, 'low': 0.0, 'high': 1.0}]}
-
-            In the bin specs for x:y, x reverts to the 1-dim setting.
-
-        :param str time_axis: name of datetime feature, used as time axis, eg 'date'. if True, will be guessed.
-        :param dict var_dtype: dictionary with specified datatype per feature (optional)
-        :param str read_key: key of input histogram-dict to read from data store .
-            (only required when calling transform(datastore) as module)
-        :param str store_key: key of output data to store in data store
-            (only required when calling transform(datastore) as module)
-        :param int nbins_1d: auto-binning number of bins for 1d histograms. default is 40.
-        :param int nbins_2d: auto-binning number of bins for 2d histograms. default is 20.
-        :param int nbins_3d: auto-binning number of bins for 3d histograms. default is 10.
-        :param int max_nunique: auto-binning threshold for unique categorical values. default is 500.
-        """
-        HistogramFillerBase.__init__(
-            self,
-            features,
-            binning,
-            bin_specs,
-            time_axis,
-            var_dtype,
-            read_key,
-            store_key,
-            nbins_1d,
-            nbins_2d,
-            nbins_3d,
-            max_nunique,
-        )
-
-    def assert_dataframe(self, df):
-        """Check that input data is a filled pandas data frame.
-
-        :param df: input (pandas) data frame
-        """
-        if not isinstance(df, pd.DataFrame):
-            raise TypeError(f"retrieved object not of type {pd.DataFrame}")
-        if df.shape[0] == 0:
-            raise RuntimeError("data is empty")
-        return df
-
-    def get_features(self, df):
-        """Get columns of (pandas) dataframe
-
-        :param df: input pandas dataframe
-        """
-        return df.columns.tolist()
-
-    def get_quantiles(self, df, quantiles=[0.05, 0.95], columns=[]):
-        """return dict with quantiles for given columns
-
-        :param df: input pandas data frame
-        :param quantiles: list of quantiles. default is [0.05, 0.95]
-        :param columns: columns to select. default is all.
-        """
-        if len(columns) == 0:
-            return {}
-        qdf = df[columns].quantile(quantiles)
-        qd = {c: qdf[c].values.tolist() for c in columns}
-        return qd
-
-    def get_nunique(self, df, columns=[]):
-        """return dict with number of unique entries for given columns
-
-        :param df: input pandas data frame
-        :param columns: columns to select (optional)
-        """
-        if not columns:
-            columns = df.columns
-        return df[columns].nunique().to_dict()
-
-    def process_features(self, df, cols_by_type):
-        """Process features before histogram filling.
-
-        Specifically, convert timestamp features to integers
-
-        :param df: input (pandas) data frame
-        :param cols_by_type: dictionary of column sets for each type
-        :returns: output (pandas) data frame with converted timestamp features
-        :rtype: pandas DataFrame
-        """
-        # timestamp variables are converted to ns here
-        # make temp df for value counting (used below)
-        idf = df[list(cols_by_type["num"]) + list(cols_by_type["str"])].copy()
-        for col in cols_by_type["dt"]:
-            self.logger.debug(
-                'Converting column "{col}" of type "{type}" to nanosec.'.format(
-                    col=col, type=self.var_dtype[col]
-                )
-            )
-            idf[col] = df[col].apply(utils.to_ns)
-        return idf
-
-    def fill_histograms(self, idf):
-        """Fill the histograms
-
-        :param idf: converted input dataframe
-        """
-        # construct empty histograms if needed
-        for cols in self.features:
-            name = ":".join(cols)
-            if name not in self._hists:
-                # create an (empty) histogram of right type
-                self._hists[name] = self.construct_empty_hist(cols)
-
-        # parallel histogram filling with working progress bar
-        num_cores = multiprocessing.cpu_count()
-        with tqdm_joblib(
-            tqdm(total=len(self.features), ncols=100)
-        ) as progress_bar:  # noqa: F841
-            res = Parallel(n_jobs=num_cores)(
-                delayed(_fill_histogram)(
-                    idf=idf[c], hist=self._hists[":".join(c)], features=c
-                )
-                for c in self.features
-            )
-            # update dictionary
-            for name, hist in res:
-                self._hists[name] = hist
-
-    def construct_empty_hist(self, features):
-        """Create an (empty) histogram of right type.
-
-        Create a multi-dim histogram by iterating through the features in
-        reverse order and passing a single-dim hist as input to the next
-        column.
-
-        :param list features: histogram features
-        :return: created histogram
-        :rtype: histogrammar.Count
-        """
-        hist = hg.Count()
-
-        # create a multi-dim histogram by iterating through the features
-        # in reverse order and passing a single-dim hist as input
-        # to the next column
-        revcols = list(reversed(features))
-        for idx, col in enumerate(revcols):
-            # histogram type depends on the data type
-            dt = self.var_dtype[col]
-
-            # processing function, e.g. only accept boolians during filling
-            f = utils.QUANTITY[dt]
-            if len(features) == 1:
-                # df[col] is a pd.series
-                quant = lambda x, fnc=f: fnc(x)  # noqa
-            else:
-                # df[features] is a pd.Dataframe
-                # fix column to col
-                quant = lambda x, fnc=f, clm=col: fnc(x[clm])  # noqa
-
-            hist = self.get_hist_bin(hist, features, quant, col, dt)
-
-        return hist
-
-
-def _fill_histogram(idf, hist, features):
-    """Fill input histogram with column(s) of input dataframe.
-
-    Separate function call for parallellization.
-
-    :param idf: input data frame used for filling histogram
-    :param hist: empty histogrammar histogram about to be filled
-    :param list features: histogram column(s)
-    """
-    name = ":".join(features)
-    clm = features[0] if len(features) == 1 else features
-    # do the actual filling
-    hist.fill.numpy(idf[clm])
-    return name, hist
-
-
-# tqdm working with joblib
-@contextlib.contextmanager
-def tqdm_joblib(tqdm_object):
-    """Context manager to patch joblib to report into tqdm progress bar given as argument
-
-    From: https://stackoverflow.com/questions/24983493/tracking-progress-of-joblib-parallel-execution?rq=1
-    """
-
-    class TqdmBatchCompletionCallback:
-        def __init__(self, time, index, parallel):
-            self.index = index
-            self.parallel = parallel
-
-        def __call__(self, index):
-            tqdm_object.update()
-            if self.parallel._original_iterator is not None:
-                self.parallel.dispatch_next()
-
-    old_batch_callback = joblib.parallel.BatchCompletionCallBack
-    joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
-    try:
-        yield tqdm_object
-    finally:
-        joblib.parallel.BatchCompletionCallBack = old_batch_callback
-        tqdm_object.close()
diff --git a/popmon/hist/filling/spark_histogrammar.py b/popmon/hist/filling/spark_histogrammar.py
deleted file mode 100644
index ad13add6..00000000
--- a/popmon/hist/filling/spark_histogrammar.py
+++ /dev/null
@@ -1,251 +0,0 @@
-"""
-Copyright Eskapade:
-License Apache-2: https://github.com/KaveIO/Eskapade-Core/blob/master/LICENSE
-Reference link:
-https://github.com/KaveIO/Eskapade-Spark/blob/master/python/eskapadespark/links/spark_histogrammar_filler.py
-All modifications copyright ING WBAA.
-"""
-
-import histogrammar as hg
-import numpy as np
-from tqdm import tqdm
-
-from ...hist.filling.histogram_filler_base import HistogramFillerBase
-
-try:
-    from pyspark.sql import DataFrame
-    from pyspark.sql.functions import approxCountDistinct
-    from pyspark.sql.functions import col as sparkcol
-except (ModuleNotFoundError, AttributeError):
-    pass
-
-
-class SparkHistogrammar(HistogramFillerBase):
-    """Fill histogrammar histograms with Spark.
-
-    Algorithm to fill histogrammar style bin, sparse-bin and category histograms
-    with Spark. Timestamp features are converted to nanoseconds before the binning
-    is applied. Final histograms are stored in the datastore.
-    """
-
-    def __init__(
-        self,
-        features=None,
-        binning="unit",
-        bin_specs=None,
-        time_axis="",
-        var_dtype=None,
-        read_key=None,
-        store_key=None,
-        nbins_1d=40,
-        nbins_2d=20,
-        nbins_3d=10,
-        max_nunique=500,
-    ):
-        """Initialize module instance.
-
-        Store and do basic check on the attributes HistogramFillerBase.
-
-        :param list features: colums to pick up from input data. (default is all features)
-            For multi-dimensional histograms, separate the column names with a :
-
-            Example features list is:
-
-            .. code-block:: python
-
-                features = ['x', 'date', 'date:x', 'date:y', 'date:x:y']
-
-        :param str binning: default binning to revert to in case bin_specs not supplied. options are:
-            "unit" or "auto", default is "unit". When using "auto", semi-clever binning is automatically done.
-        :param dict bin_specs: dictionaries used for rebinning numeric or timestamp features
-
-            Example bin_specs dictionary is:
-
-            .. code-block:: python
-
-                bin_specs = {'x': {'bin_width': 1, 'bin_offset': 0},
-                             'y': {'num': 10, 'low': 0.0, 'high': 2.0},
-                             'x:y': [{}, {'num': 5, 'low': 0.0, 'high': 1.0}]}
-
-            In the bin specs for x:y, x reverts to the 1-dim setting.
-
-        :param str time_axis: name of datetime feature, used as time axis, eg 'date'. if True, will be guessed.
-            If time_axis is set, if no features given, features becomes: ['date:x', 'date:y', 'date:z'] etc.
-        :param dict var_dtype: dictionary with specified datatype per feature (optional)
-        :param str read_key: key of input histogram-dict to read from data store .
-            (only required when calling transform(datastore) as module)
-        :param str store_key: key of output data to store in data store
-            (only required when calling transform(datastore) as module)
-        :param int nbins_1d: auto-binning number of bins for 1d histograms. default is 40.
-        :param int nbins_2d: auto-binning number of bins for 2d histograms. default is 20.
-        :param int nbins_3d: auto-binning number of bins for 3d histograms. default is 10.
-        :param int max_nunique: auto-binning threshold for unique categorical values. default is 500.
-        """
-        HistogramFillerBase.__init__(
-            self,
-            features,
-            binning,
-            bin_specs,
-            time_axis,
-            var_dtype,
-            read_key,
-            store_key,
-            nbins_1d,
-            nbins_2d,
-            nbins_3d,
-            max_nunique,
-        )
-        self._unit_timestamp_specs = {
-            k: float(self._unit_timestamp_specs[k])
-            for i, k in enumerate(self._unit_timestamp_specs)
-        }
-
-    def assert_dataframe(self, df):
-        """Check that input data is a filled spark data frame.
-
-        :param df: input (spark) data frame
-        """
-        if not isinstance(df, DataFrame):
-            raise TypeError("retrieved object not of type Spark DataFrame")
-        assert not len(df.head(1)) == 0, "input dataframe is empty"
-        return df
-
-    def get_features(self, df):
-        """Get columns of dataframe
-
-        :param df: input spark dataframe
-        """
-        return df.columns
-
-    def get_quantiles(self, df, quantiles=[0.05, 0.95], columns=[]):
-        """return dict with quantiles for given columns
-
-        :param df: input (spark) data frame
-        :param quantiles: list of quantiles. default is [0.05, 0.95]
-        :param columns: columns to select. default is all.
-        """
-        if len(columns) == 0:
-            return {}
-        qsl = df.approxQuantile(columns, quantiles, 0.25)
-        qd = {c: qs for c, qs in zip(columns, qsl)}
-        return qd
-
-    def get_nunique(self, df, columns=[]):
-        """return dict with number of unique entries for given columns
-
-        :param df: input (spark) data frame
-        :param columns: columns to select (optional)
-        """
-        if not columns:
-            columns = df.columns
-        qdf = df.agg(*(approxCountDistinct(sparkcol(c)).alias(c) for c in columns))
-        return qdf.toPandas().T[0].to_dict()
-
-    def get_data_type(self, df, col):
-        """Get data type of dataframe column.
-
-        :param df: input data frame
-        :param str col: column
-        """
-        if col not in df.columns:
-            raise KeyError(f'Column "{col:s}" not in input dataframe.')
-        dt = dict(df.dtypes)[col]
-        # spark conversions to numpy or python equivalent
-        if dt == "string":
-            dt = "str"
-        elif dt in ["timestamp", "date"]:
-            dt = np.datetime64
-        elif dt == "boolean":
-            dt = bool
-        elif dt == "bigint":
-            dt = np.int64
-
-        return np.dtype(dt)
-
-    def process_features(self, df, cols_by_type):
-        """Process features before histogram filling.
-
-        Specifically, in this case convert timestamp features to nanoseconds
-
-        :param df: input data frame
-        :return: output data frame with converted timestamp features
-        :rtype: DataFrame
-        """
-        # make alias df for value counting (used below)
-        idf = df.alias("")
-
-        # timestamp variables are converted here to ns since 1970-1-1
-        # histogrammar does not yet support long integers, so convert timestamps to float
-        # epoch = (sparkcol("ts").cast("bigint") * 1000000000).cast("bigint")
-        for col in cols_by_type["dt"]:
-            self.logger.debug(
-                'Converting column "{col}" of type "{type}" to nanosec.'.format(
-                    col=col, type=self.var_dtype[col]
-                )
-            )
-
-            # first cast to timestamp (in case column is stored as date)
-            to_ns = sparkcol(col).cast("timestamp").cast("float") * 1e9
-            idf = idf.withColumn(col, to_ns)
-
-        return idf
-
-    def construct_empty_hist(self, df, features):
-        """Create an (empty) histogram of right type.
-
-        Create a multi-dim histogram by iterating through the features in
-        reverse order and passing a single-dim hist as input to the next
-        column.
-
-        :param df: input dataframe
-        :param list features: histogram features
-        :return: created histogram
-        :rtype: histogrammar.Count
-        """
-        hist = hg.Count()
-
-        # create a multi-dim histogram by iterating through
-        # the features in reverse order and passing a single-dim hist
-        # as input to the next column
-        revcols = list(reversed(features))
-        for idx, col in enumerate(revcols):
-            # histogram type depends on the data type
-            dt = self.var_dtype[col]
-            quant = df[col]
-
-            hist = self.get_hist_bin(hist, features, quant, col, dt)
-
-        return hist
-
-    def fill_histograms(self, idf):
-        """Fill the histograms
-
-        :param idf: input data frame used for filling histogram
-        """
-        for cols in tqdm(self.features, ncols=100):
-            self.logger.debug(
-                'Processing feature "{cols}".'.format(cols=":".join(cols))
-            )
-            self.fill_histogram(idf, cols)
-
-    def fill_histogram(self, idf, features):
-        """Fill input histogram with column(s) of input dataframe.
-
-        :param idf: input data frame used for filling histogram
-        :param list features: histogram column(s)
-        """
-        name = ":".join(features)
-        if name not in self._hists:
-            # create an (empty) histogram of right type
-            self._hists[name] = self.construct_empty_hist(idf, features)
-        hist = self._hists[name]
-
-        # do the actual filling
-        hist.fill.sparksql(idf)
-        self._hists[name] = hist
-
-    def _execute(self, df):
-        df.persist()
-        hists = super()._execute(df)
-        df.unpersist()
-        return hists
diff --git a/popmon/hist/filling/utils.py b/popmon/hist/filling/utils.py
deleted file mode 100644
index 4cc5eeef..00000000
--- a/popmon/hist/filling/utils.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright (c) 2020 ING Wholesale Banking Advanced Analytics
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of
-# this software and associated documentation files (the "Software"), to deal in
-# the Software without restriction, including without limitation the rights to
-# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so,
-# subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-
-import numpy as np
-import pandas as pd
-
-NUM_NS_DAY = 24 * 3600 * int(1e9)
-
-
-def check_column(col, sep=":"):
-    """Convert input column string to list of columns
-
-    :param col: input string
-    :param sep: default ":"
-    :return: list of columns
-    """
-    if isinstance(col, str):
-        col = col.split(sep)
-    elif not isinstance(col, list):
-        raise TypeError(f'Columns "{col}" needs to be a string or list of strings')
-    return col
-
-
-def check_dtype(dtype):
-    """Convert datatype to consistent numpy datatype
-
-    :param dtype: input datatype
-    :rtype: numpy.dtype.type
-    """
-    try:
-        if hasattr(dtype, "type"):
-            # this converts pandas types, such as pd.Int64, into numpy types
-            dtype = type(dtype.type())
-        dtype = np.dtype(dtype).type
-        if dtype in {np.str_, np.string_, np.object_}:
-            dtype = np.dtype(str).type
-    except BaseException:
-        raise RuntimeError(f'unknown assigned datatype "{dtype}"')
-    return dtype
-
-
-def to_ns(x):
-    """Convert input timestamps to nanoseconds (integers).
-
-    :param x: value to be converted
-    :returns: converted value
-    :rtype: int
-    """
-    if pd.isnull(x):
-        return 0
-    try:
-        return pd.to_datetime(x).value
-    except Exception:
-        if hasattr(x, "__str__"):
-            return pd.to_datetime(str(x)).value
-    return 0
-
-
-def to_str(val):
-    """Convert input to (array of) string(s).
-
-    :param val: value to be converted
-    :returns: converted value
-    :rtype: str or np.ndarray
-    """
-    if isinstance(val, str):
-        return val
-    elif hasattr(val, "__iter__"):
-        return np.asarray(
-            list(
-                map(
-                    lambda s: s
-                    if isinstance(s, str)
-                    else str(s)
-                    if hasattr(s, "__str__")
-                    else "",
-                    val,
-                )
-            )
-        )
-
-    elif hasattr(val, "__str__"):
-        return str(val)
-
-    return ""
-
-
-def only_str(val):
-    """Pass input value or array only if it is a string.
-
-    :param val: value to be evaluated
-    :returns: evaluated value
-    :rtype: str or np.ndarray
-    """
-    if isinstance(val, str):
-        return val
-    elif hasattr(val, "__iter__"):
-        return np.asarray([s if isinstance(s, str) else "None" for s in val])
-    return "None"
-
-
-def only_bool(val):
-    """Pass input value or array only if it is a bool.
-
-    :param val: value to be evaluated
-    :returns: evaluated value
-    :rtype: np.bool or np.ndarray
-    """
-    if isinstance(val, (np.bool_, bool)):
-        return val
-    elif hasattr(val, "__iter__") and not isinstance(val, str):
-        return np.asarray(
-            [s if isinstance(s, (np.bool_, bool)) else np.nan for s in val]
-        )
-    return np.nan
-
-
-def only_int(val):
-    """Pass input val value or array only if it is an integer.
-
-    :param val: value to be evaluated
-    :returns: evaluated value
-    :rtype: np.int64 or np.ndarray
-    """
-    if isinstance(val, (np.int64, int)):
-        return val
-    elif hasattr(val, "__iter__") and not isinstance(val, str):
-        return np.asarray(
-            [s if isinstance(s, (np.int64, int)) else np.nan for s in val]
-        )
-    return np.nan
-
-
-def only_float(val):
-    """Pass input val value or array only if it is a float.
-
-    :param val: value to be evaluated
-    :returns: evaluated value
-    :rtype: np.float64 or np.ndarray
-    """
-    if isinstance(val, (np.float64, float)):
-        return val
-    elif hasattr(val, "__iter__") and not isinstance(val, str):
-        return np.asarray(
-            [s if isinstance(s, (np.float64, float)) else np.nan for s in val]
-        )
-    return np.nan
-
-
-QUANTITY = {
-    str: only_str,
-    np.str_: only_str,
-    int: only_int,
-    np.int64: only_int,
-    np.int32: only_int,
-    bool: only_bool,
-    np.bool_: only_bool,
-    float: only_float,
-    np.float64: only_float,
-    np.datetime64: only_int,
-}
-
-
-def value_to_bin_index(val, **kwargs):
-    """Convert value to bin index.
-
-    Convert a numeric or timestamp column to an integer bin index.
-
-    :param bin_width: bin_width value needed to convert column
-        to an integer bin index
-    :param bin_offset: bin_offset value needed to convert column
-        to an integer bin index
-    """
-    try:
-        # NOTE this notation also works for timestamps
-        bin_width = kwargs.get("bin_width", 1)
-        bin_offset = kwargs.get("bin_offset", 0)
-        bin_index = int(np.floor((val - bin_offset) / bin_width))
-        return bin_index
-    except BaseException:
-        pass
-    return val
-
-
-def value_to_bin_center(val, **kwargs):
-    """Convert value to bin center.
-
-    Convert a numeric or timestamp column to a common bin center value.
-
-    :param bin_width: bin_width value needed to convert column
-        to a common bin center value
-    :param bin_offset: bin_offset value needed to convert column
-        to a common bin center value
-    """
-    try:
-        # NOTE this notation also works for timestamps, and does not change the
-        # unit
-        bin_width = kwargs.get("bin_width", 1)
-        bin_offset = kwargs.get("bin_offset", 0)
-        bin_index = int(np.floor((val - bin_offset) / bin_width))
-        obj_type = type(bin_width)
-        return bin_offset + obj_type((bin_index + 0.5) * bin_width)
-    except BaseException:
-        pass
-    return val
diff --git a/popmon/hist/hist_splitter.py b/popmon/hist/hist_splitter.py
index cab521d5..663eb305 100644
--- a/popmon/hist/hist_splitter.py
+++ b/popmon/hist/hist_splitter.py
@@ -21,7 +21,11 @@
 import pandas as pd
 
 from ..base import Module
-from ..hist.histogram import HistogramContainer
+from ..hist.hist_utils import (
+    get_histogram,
+    is_timestamp,
+    split_hist_along_first_dimension,
+)
 
 
 class HistSplitter(Module):
@@ -87,8 +91,7 @@ def update_divided(self, divided, split, yname):
             divided.update(split)
         else:
             divided[yname] = [
-                {self.index_col: k, self.hist_col: HistogramContainer(h)}
-                for k, h in split.items()
+                {self.index_col: k, self.hist_col: h} for k, h in split.items()
             ]
         return divided
 
@@ -106,32 +109,31 @@ def transform(self, datastore):
         # if so requested split selected histograms along first axis, and then divide
         for feature in features[:]:
             self.logger.debug(f'Now splitting histogram "{feature}"')
-            hc = HistogramContainer(data[feature])
-            if hc.n_dim <= 1:
+            hist = get_histogram(data[feature])
+            if hist.n_dim <= 1:
                 self.logger.debug(
                     f'Histogram "{feature}" does not have two or more dimensions, nothing to split; skipping.'
                 )
                 continue
 
             cols = feature.split(":")
-            if len(cols) != hc.n_dim:
+            if len(cols) != hist.n_dim:
                 self.logger.error(
-                    f'Dimension of histogram "{feature}" not consistent: {hc.n_dim} vs {len(cols)}; skipping.'
+                    f'Dimension of histogram "{feature}" not consistent: {hist.n_dim} vs {len(cols)}; skipping.'
                 )
                 continue
 
             xname, yname = cols[0], ":".join(cols[1:])  # 'time:x:y' -> 'time', 'x:y'
             if yname in divided:
-                self.logger.debug(
-                    f'HistogramContainer "{yname}" already divided; skipping.'
-                )
+                self.logger.debug(f'Histogram "{yname}" already divided; skipping.')
                 continue
 
             # if requested split selected histograms along first axis. e.g. time:x:y is split along time
             # then check if sub-hists of x:y can be further projected. eg. x:y is projected on x and y as well.
             # datatype properties
-            is_ts = hc.is_ts or xname in self.var_timestamp
-            split = hc.split_hist_along_first_dimension(
+            is_ts = is_timestamp(hist) or xname in self.var_timestamp
+            split = split_hist_along_first_dimension(
+                hist=hist,
                 short_keys=self.short_keys,
                 convert_time_index=is_ts,
                 xname=xname,
diff --git a/popmon/hist/hist_utils.py b/popmon/hist/hist_utils.py
new file mode 100644
index 00000000..2ab97ccf
--- /dev/null
+++ b/popmon/hist/hist_utils.py
@@ -0,0 +1,313 @@
+# Copyright (c) 2020 ING Wholesale Banking Advanced Analytics
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+import histogrammar
+import numpy as np
+import pandas as pd
+from histogrammar.util import get_hist_props
+
+COMMON_HIST_TYPES = (
+    histogrammar.Categorize,
+    histogrammar.Bin,
+    histogrammar.SparselyBin,
+    histogrammar.specialized.CategorizeHistogramMethods,
+    histogrammar.specialized.HistogramMethods,
+    histogrammar.specialized.SparselyHistogramMethods,
+    histogrammar.specialized.CategorizeHistogramMethods,
+    histogrammar.specialized.TwoDimensionallyHistogramMethods,
+    histogrammar.specialized.SparselyTwoDimensionallyHistogramMethods,
+)
+
+HG_FACTORY = histogrammar.Factory()
+
+
+def sum_entries(hist, default=True):
+    """Recursively get sum of entries of histogram
+
+    Sometimes hist.entries gives zero as answer? This function always works though.
+
+    :param hist: input histogrammar histogram
+    :param bool default: if false, do not use default HG method for evaluating entries, but exclude nans, of, uf.
+    :return: total sum of entries of histogram
+    :rtype: int
+    """
+    if default:
+        entries = hist.entries
+        if entries > 0:
+            return entries
+
+    # double check number of entries, sometimes not well set
+    sume = 0
+    if hasattr(hist, "bins"):
+        # loop over all counters and integrate over y (=j)
+        for i in hist.bins:
+            bi = hist.bins[i]
+            sume += sum_entries(bi)
+    elif hasattr(hist, "values"):
+        # loop over all counters and integrate over y (=j)
+        for i, bi in enumerate(hist.values):
+            sume += sum_entries(bi)
+    elif hasattr(hist, "entries"):
+        # only count histogrammar.Count() objects
+        sume += hist.entries
+    return sume
+
+
+def project_on_x(hist):
+    """Project n-dim histogram onto x-axis
+
+    :param hist: input histogrammar histogram
+    :return: on x-axis projected histogram (1d)
+    """
+    # basic check: projecting on itself
+    if hasattr(hist, "n_dim") and hist.n_dim <= 1:
+        return hist
+    # basic checks on contents
+    if hasattr(hist, "bins"):
+        if len(hist.bins) == 0:
+            return hist
+    elif hasattr(hist, "values"):
+        if len(hist.values) == 0:
+            return hist
+    else:
+        return hist
+
+    # make empty clone
+    # note: cannot do: h_x = hist.zero(), b/c it copies n-dim structure, which screws up hist.toJsonString()
+    if isinstance(hist, histogrammar.Bin):
+        h_x = histogrammar.Bin(
+            num=hist.num,
+            low=hist.low,
+            high=hist.high,
+            quantity=hist.quantity,
+        )
+    elif isinstance(hist, histogrammar.SparselyBin):
+        h_x = histogrammar.SparselyBin(
+            binWidth=hist.binWidth,
+            origin=hist.origin,
+            quantity=hist.quantity,
+        )
+    elif isinstance(hist, histogrammar.Categorize):
+        h_x = histogrammar.Categorize(quantity=hist.quantity)
+    else:
+        raise RuntimeError("unknown historgram type. cannot get zero copy.")
+
+    if hasattr(hist, "bins"):
+        for key, bi in hist.bins.items():
+            h_x.bins[key] = histogrammar.Count.ed(sum_entries(bi))
+    elif hasattr(hist, "values"):
+        for i, bi in enumerate(hist.values):
+            h_x.values[i] = histogrammar.Count.ed(sum_entries(bi))
+
+    return h_x
+
+
+def sum_over_x(hist):
+    """Integrate histogram over first dimension
+
+    :param hist: input histogrammar histogram
+    :return: integrated histogram
+    """
+    # basic check: nothing to do?
+    if hasattr(hist, "n_dim") and hist.n_dim == 0:
+        return hist
+    if hasattr(hist, "n_dim") and hist.n_dim == 1:
+        return histogrammar.Count.ed(sum_entries(hist))
+
+    # n_dim >= 2 from now on
+    # basic checks on contents
+    if hasattr(hist, "bins"):
+        if len(hist.bins) == 0:
+            return hist
+    elif hasattr(hist, "values"):
+        if len(hist.values) == 0:
+            return hist
+    else:
+        return hist
+
+    # n_dim >= 2 and we have contents; here we sum over it.
+    h_proj = None
+    if hasattr(hist, "bins"):
+        h_proj = list(hist.bins.values())[0].zero()
+        # loop over all counters and integrate over x (=i)
+        for bi in hist.bins.values():
+            h_proj += bi
+    elif hasattr(hist, "values"):
+        h_proj = hist.values[0].zero()
+        # loop over all counters and integrate
+        for bi in hist.values:
+            h_proj += bi
+
+    return h_proj
+
+
+def project_split2dhist_on_axis(splitdict, axis="x"):
+    """Project a split 2d-histogram onto one axis
+
+    Project a 2d hist that's been split with function split_hist_along_first_dimension
+    onto x or y axis.
+
+    :param dict splitdict: input split histogram to be projected.
+    :param str axis: name of axis to project on, should be x or y. default is x.
+
+    :return: sorted dictionary of sub-histograms, with as keys the x-axis name and bin-number
+    :rtype: SortedDict
+    """
+    if not isinstance(splitdict, dict):
+        raise TypeError(
+            "splitdict: {wt}, type should be a dictionary.".format(wt=type(splitdict))
+        )
+    if axis not in ["x", "y"]:
+        raise ValueError(f"axis: {axis}, can only be x or y.")
+
+    hdict = dict()
+
+    for key, hxy in splitdict.items():
+        h = project_on_x(hxy) if axis == "x" else sum_over_x(hxy)
+        hdict[key] = h
+
+    return hdict
+
+
+def get_histogram(hist_obj):
+    """
+    Parse input and convert to histogrammar object
+
+    :param hist_obj: input histogrammar object. Can also be a corresponding json object or str.
+    :return: histogrammar histogram
+    """
+    hist = None
+    if isinstance(hist_obj, COMMON_HIST_TYPES):
+        hist = hist_obj
+    elif isinstance(hist_obj, str):
+        hist = HG_FACTORY.fromJsonString(hist_obj)
+    elif isinstance(hist_obj, dict):
+        hist = HG_FACTORY.fromJson(hist_obj)
+    if hist is None:
+        raise ValueError("Please provide histogram object as input.")
+    return hist
+
+
+def is_timestamp(hist):
+    props = get_hist_props(hist)
+    return props["is_ts"]
+
+
+def is_numeric(hist):
+    props = get_hist_props(hist)
+    return props["is_num"]
+
+
+def sparse_bin_centers_x(hist):
+    """Get x-axis bin centers of sparse histogram"""
+    keys = sorted(hist.bins.keys())
+    if hist.minBin is None or hist.maxBin is None:
+        # number of bins is set to 1.
+        centers = np.array([hist.origin + 0.5 * hist.binWidth])
+    else:
+        centers = np.array([hist.origin + (i + 0.5) * hist.binWidth for i in keys])
+
+    values = [hist.bins[key] for key in keys]
+    return centers, values
+
+
+def get_bin_centers(hist):
+    """Get bin centers or labels of histogram"""
+    if isinstance(hist, histogrammar.Bin):  # Bin
+        centers, values = hist.bin_centers(), hist.values
+    elif isinstance(hist, histogrammar.SparselyBin):
+        centers, values = sparse_bin_centers_x(hist)
+    else:  # categorize
+        centers, values = hist.bin_labels(), hist.values
+    return centers, values
+
+
+def split_hist_along_first_dimension(
+    hist,
+    xname="x",
+    yname="y",
+    short_keys=True,
+    convert_time_index=True,
+    filter_empty_split_hists=True,
+):
+    """Split (multi-dimensional) hist into sub-hists along x-axis
+
+    Function to split a (multi-dimensional) histogram into sub-histograms
+    along the first dimension encountered.
+
+    :param str xname: name of x-axis. default is x.
+    :param str yname: name of y-axis. default is y.
+    :param bool short_keys: if false, use long descriptive dict keys.
+    :param bool convert_time_index: if first dimension is a datetime, convert to pandas timestamp. default is true.
+    :param bool filter_empty_split_hists: filter out empty sub-histograms after splitting. default is True.
+    :returns: sorted dictionary of sub-histograms, with as keys the x-axis name and bin-number
+    :rtype: SortedDict
+    """
+    hdict = dict()
+
+    # nothing special to do
+    if hist.n_dim == 0:
+        hdict["dummy"] = hist
+        return hdict
+
+    centers, values = get_bin_centers(hist)
+
+    # MB 20191004: this happens rarely, but, in Histogrammar, if a multi-dim histogram contains *only*
+    #   nans, overflows, or underflows for x, its sub-dimensional histograms (y, z, etc) do not get filled
+    #   and/or are created. For sparselybin histograms this screws up the event-count, and evaluation of n-dim and
+    #   datatype, so that the comparison of split-histograms along the x-axis gives inconsistent histograms.
+    #   In this step we filter out any such empty sub-histograms, to ensure that
+    #   all left-over sub-histograms are consistent with each other.
+    if filter_empty_split_hists:
+        centers, values = _filter_empty_split_hists(centers, values)
+
+    for name, val in zip(centers, values):
+        name = _edit_name(hist, name, xname, yname, convert_time_index, short_keys)
+        hdict[name] = val
+
+    return hdict
+
+
+def _filter_empty_split_hists(centers, values):
+    """Filter empty split histograms from input centers and values
+
+    :param list centers: input center values list
+    :param list values: input values list
+    :return: filtered centers and values lists
+    """
+    cc = []
+    vv = []
+    for c, v in zip(centers, values):
+        # ignore nan, overflow and underflow counters in total event count
+        entries = sum_entries(v, default=False)
+        if entries > 0:
+            cc.append(c)
+            vv.append(v)
+    return cc, vv
+
+
+def _edit_name(hist, axis_name, xname, yname, convert_time_index, short_keys):
+    if convert_time_index and is_timestamp(hist):
+        axis_name = pd.Timestamp(axis_name)
+    if not short_keys:
+        axis_name = f"{xname}={axis_name}"
+        if hist.n_dim >= 2:
+            axis_name = f"{yname}[{axis_name}]"
+    return axis_name
diff --git a/popmon/hist/histogram.py b/popmon/hist/histogram.py
deleted file mode 100644
index d612c84f..00000000
--- a/popmon/hist/histogram.py
+++ /dev/null
@@ -1,360 +0,0 @@
-# Copyright (c) 2020 ING Wholesale Banking Advanced Analytics
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of
-# this software and associated documentation files (the "Software"), to deal in
-# the Software without restriction, including without limitation the rights to
-# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so,
-# subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-
-import numpy as np
-import pandas as pd
-
-from ..hist.patched_histogrammer import COMMON_HIST_TYPES, histogrammar
-
-HG_FACTORY = histogrammar.Factory()
-
-
-def sum_entries(hist_data, default=True):
-    """Recursively get sum of entries of histogram
-
-    Sometimes hist.entries gives zero as answer? This function always works though.
-
-    :param hist_data: input histogrammar histogram
-    :param bool default: if false, do not use default HG method for evaluating entries, but exclude nans, of, uf.
-    :return: total sum of entries of histogram
-    :rtype: int
-    """
-    if default:
-        entries = hist_data.entries
-        if entries > 0:
-            return entries
-
-    # double check number of entries, sometimes not well set
-    sume = 0
-    if hasattr(hist_data, "bins"):
-        # loop over all counters and integrate over y (=j)
-        for i in hist_data.bins:
-            bi = hist_data.bins[i]
-            sume += sum_entries(bi)
-    elif hasattr(hist_data, "values"):
-        # loop over all counters and integrate over y (=j)
-        for i, bi in enumerate(hist_data.values):
-            sume += sum_entries(bi)
-    elif hasattr(hist_data, "entries"):
-        # only count histogrammar.Count() objects
-        sume += hist_data.entries
-    return sume
-
-
-def project_on_x(hist_data):
-    """Project n-dim histogram onto x-axis
-
-    :param hist_data: input histogrammar histogram
-    :return: on x-axis projected histogram (1d)
-    """
-    # basic check: projecting on itself
-    if hasattr(hist_data, "n_dim") and hist_data.n_dim <= 1:
-        return hist_data
-    # basic checks on contents
-    if hasattr(hist_data, "bins"):
-        if len(hist_data.bins) == 0:
-            return hist_data
-    elif hasattr(hist_data, "values"):
-        if len(hist_data.values) == 0:
-            return hist_data
-    else:
-        return hist_data
-
-    # make empty clone
-    # note: cannot do: h_x = hist.zero(), b/c it copies n-dim structure, which screws up hist.toJsonString()
-    if isinstance(hist_data, histogrammar.Bin):
-        h_x = histogrammar.Bin(
-            num=hist_data.num,
-            low=hist_data.low,
-            high=hist_data.high,
-            quantity=hist_data.quantity,
-        )
-    elif isinstance(hist_data, histogrammar.SparselyBin):
-        h_x = histogrammar.SparselyBin(
-            binWidth=hist_data.binWidth,
-            origin=hist_data.origin,
-            quantity=hist_data.quantity,
-        )
-    elif isinstance(hist_data, histogrammar.Categorize):
-        h_x = histogrammar.Categorize(quantity=hist_data.quantity)
-    else:
-        raise RuntimeError("unknown historgram type. cannot get zero copy.")
-
-    if hasattr(hist_data, "bins"):
-        for key, bi in hist_data.bins.items():
-            h_x.bins[key] = histogrammar.Count.ed(sum_entries(bi))
-    elif hasattr(hist_data, "values"):
-        for i, bi in enumerate(hist_data.values):
-            h_x.values[i] = histogrammar.Count.ed(sum_entries(bi))
-
-    return h_x
-
-
-def sum_over_x(hist_data):
-    """Integrate histogram over first dimension
-
-    :param hist_data: input histogrammar histogram
-    :return: integrated histogram
-    """
-    # basic check: nothing to do?
-    if hasattr(hist_data, "n_dim") and hist_data.n_dim == 0:
-        return hist_data
-    if hasattr(hist_data, "n_dim") and hist_data.n_dim == 1:
-        return histogrammar.Count.ed(sum_entries(hist_data))
-
-    # n_dim >= 2 from now on
-    # basic checks on contents
-    if hasattr(hist_data, "bins"):
-        if len(hist_data.bins) == 0:
-            return hist_data
-    elif hasattr(hist_data, "values"):
-        if len(hist_data.values) == 0:
-            return hist_data
-    else:
-        return hist_data
-
-    # n_dim >= 2 and we have contents; here we sum over it.
-    h_proj = None
-    if hasattr(hist_data, "bins"):
-        h_proj = list(hist_data.bins.values())[0].zero()
-        # loop over all counters and integrate over x (=i)
-        for bi in hist_data.bins.values():
-            h_proj += bi
-    elif hasattr(hist_data, "values"):
-        h_proj = hist_data.values[0].zero()
-        # loop over all counters and integrate
-        for bi in hist_data.values:
-            h_proj += bi
-
-    return h_proj
-
-
-def project_split2dhist_on_axis(splitdict, axis="x"):
-    """Project a split 2d-histogram onto one axis
-
-    Project a 2d hist that's been split with function split_hist_along_first_dimension
-    onto x or y axis.
-
-    :param dict splitdict: input split histogram to be projected.
-    :param str axis: name of axis to project on, should be x or y. default is x.
-
-    :return: sorted dictionary of sub-histograms, with as keys the x-axis name and bin-number
-    :rtype: SortedDict
-    """
-    if not isinstance(splitdict, dict):
-        raise TypeError(
-            "splitdict: {wt}, type should be a dictionary.".format(wt=type(splitdict))
-        )
-    if axis not in ["x", "y"]:
-        raise ValueError(f"axis: {axis}, can only be x or y.")
-
-    hdict = dict()
-
-    for key, hxy in splitdict.items():
-        h = project_on_x(hxy) if axis == "x" else sum_over_x(hxy)
-        hdict[key] = h
-
-    return hdict
-
-
-class HistogramContainer:
-    """Wrapper class around histogrammar histograms with several utility functions."""
-
-    def __init__(self, hist_obj):
-        """Initialization
-
-        :param hist_obj: input histogrammar object. Can also be a corresponding json object or str.
-        """
-        self.hist = None
-        if isinstance(hist_obj, HistogramContainer):
-            self.hist = hist_obj.hist
-        elif isinstance(hist_obj, COMMON_HIST_TYPES):
-            self.hist = hist_obj
-        elif isinstance(hist_obj, str):
-            self.hist = HG_FACTORY.fromJsonString(hist_obj)
-        elif isinstance(hist_obj, dict):
-            self.hist = HG_FACTORY.fromJson(hist_obj)
-        if self.hist is None:
-            raise ValueError(
-                "Please provide histogram or histogram container as input."
-            )
-
-        self.is_list = isinstance(self.hist.datatype, list)
-        var_type = self.hist.datatype if not self.is_list else self.hist.datatype[0]
-        self.npdtype = np.dtype(var_type)
-
-        # determine data-type categories
-        self.is_int = np.issubdtype(self.npdtype, np.integer)
-        self.is_ts = np.issubdtype(self.npdtype, np.datetime64)
-        self.is_num = self.is_ts or np.issubdtype(self.npdtype, np.number)
-        self.n_dim = self.hist.n_dim
-        self.entries = self.hist.entries
-
-    def __repr__(self):
-        return f"HistogramContainer(dtype={self.npdtype}, n_dims={self.n_dim})"
-
-    def __str__(self):
-        return repr(self)
-
-    def _edit_name(self, axis_name, xname, yname, convert_time_index, short_keys):
-        if convert_time_index and self.is_ts:
-            axis_name = pd.Timestamp(axis_name)
-        if not short_keys:
-            axis_name = f"{xname}={axis_name}"
-            if self.n_dim >= 2:
-                axis_name = f"{yname}[{axis_name}]"
-        return axis_name
-
-    def sparse_bin_centers_x(self):
-        """Get x-axis bin centers of sparse histogram"""
-        keys = sorted(self.hist.bins.keys())
-        if self.hist.minBin is None or self.hist.maxBin is None:
-            # number of bins is set to 1.
-            centers = np.array([self.hist.origin + 0.5 * self.hist.binWidth])
-        else:
-            centers = np.array(
-                [self.hist.origin + (i + 0.5) * self.hist.binWidth for i in keys]
-            )
-
-        values = [self.hist.bins[key] for key in keys]
-        return centers, values
-
-    def get_bin_centers(self):
-        """Get bin centers or labels of histogram"""
-        if isinstance(self.hist, histogrammar.Bin):  # Bin
-            centers, values = self.hist.bin_centers(), self.hist.values
-        elif isinstance(self.hist, histogrammar.SparselyBin):
-            centers, values = self.sparse_bin_centers_x()
-        else:  # categorize
-            centers, values = self.hist.bin_labels(), self.hist.values
-        return centers, values
-
-    def split_hist_along_first_dimension(
-        self,
-        xname="x",
-        yname="y",
-        short_keys=True,
-        convert_time_index=True,
-        filter_empty_split_hists=True,
-    ):
-        """Split (multi-dimensional) hist into sub-hists along x-axis
-
-        Function to split a (multi-dimensional) histogram into sub-histograms
-        along the first dimension encountered.
-
-        :param str xname: name of x-axis. default is x.
-        :param str yname: name of y-axis. default is y.
-        :param bool short_keys: if false, use long descriptive dict keys.
-        :param bool convert_time_index: if first dimension is a datetime, convert to pandas timestamp. default is true.
-        :param bool filter_empty_split_hists: filter out empty sub-histograms after splitting. default is True.
-        :returns: sorted dictionary of sub-histograms, with as keys the x-axis name and bin-number
-        :rtype: SortedDict
-        """
-        hdict = dict()
-
-        # nothing special to do
-        if self.n_dim == 0:
-            hdict["dummy"] = self.hist
-            return hdict
-
-        centers, values = self.get_bin_centers()
-
-        # MB 20191004: this happens rarely, but, in Histogrammar, if a multi-dim histogram contains *only*
-        #   nans, overflows, or underflows for x, its sub-dimensional histograms (y, z, etc) do not get filled
-        #   and/or are created. For sparselybin histograms this screws up the event-count, and evaluation of n-dim and
-        #   datatype, so that the comparison of split-histograms along the x-axis gives inconsistent histograms.
-        #   In this step we filter out any such empty sub-histograms, to ensure that
-        #   all left-over sub-histograms are consistent with each other.
-        if filter_empty_split_hists:
-            centers, values = self._filter_empty_split_hists(centers, values)
-
-        for name, val in zip(centers, values):
-            name = self._edit_name(name, xname, yname, convert_time_index, short_keys)
-            hdict[name] = val
-
-        return hdict
-
-    def _filter_empty_split_hists(self, centers, values):
-        """Filter empty split histograms from input centers and values
-
-        :param list centers: input center values list
-        :param list values: input values list
-        :return: filtered centers and values lists
-        """
-        cc = []
-        vv = []
-        for c, v in zip(centers, values):
-            # ignore nan, overflow and underflow counters in total event count
-            entries = sum_entries(v, default=False)
-            if entries > 0:
-                cc.append(c)
-                vv.append(v)
-        return cc, vv
-
-
-def get_hist_props(hist):
-    """Get histogram datatype properties.
-
-    :param hist: input histogram
-    :returns dict: Column properties
-    """
-    hist = hist.hist if isinstance(hist, HistogramContainer) else hist
-
-    var_type = (
-        hist.datatype if not isinstance(hist.datatype, list) else hist.datatype[0]
-    )
-    npdtype = np.dtype(var_type)
-
-    # determine data-type categories
-    is_int = isinstance(npdtype.type(), np.integer)
-    is_ts = isinstance(npdtype.type(), np.datetime64)
-    is_num = is_ts or isinstance(npdtype.type(), np.number)
-    is_bool = isinstance(npdtype.type(), np.bool_)
-
-    return dict(
-        dtype=npdtype, is_num=is_num, is_int=is_int, is_ts=is_ts, is_bool=is_bool
-    )
-
-
-def dumper(obj):
-    """Utility function to convert objects to json
-
-    From: https://stackoverflow.com/questions/3768895/how-to-make-a-class-json-serializable
-    E.g. use to convert dict of histogrammar objects to json
-
-    Use as:
-
-    .. code-block:: python
-
-        js = json.dumps(hists, default=dumper)
-        with open(filename, 'w') as f:
-            json.dump(hists, f, default=dumper)
-
-    :param obj: input object
-    :return: output json object
-    """
-    if hasattr(obj, "toJSON"):
-        return obj.toJSON()
-    elif hasattr(obj, "toJson"):
-        return obj.toJson()
-    elif hasattr(obj, "__dict__"):
-        return obj.__dict__
-    else:
-        raise RuntimeError(f"Do not know how to serialize object type {type(obj)}")
diff --git a/popmon/hist/patched_histogrammer.py b/popmon/hist/patched_histogrammer.py
deleted file mode 100644
index 5d9eb002..00000000
--- a/popmon/hist/patched_histogrammer.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2020 ING Wholesale Banking Advanced Analytics
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of
-# this software and associated documentation files (the "Software"), to deal in
-# the Software without restriction, including without limitation the rights to
-# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-# the Software, and to permit persons to whom the Software is furnished to do so,
-# subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-
-import histogrammar
-import numpy as np
-
-# large numbers (time in ns since 1970) used to determine if float corresponds to a timestamp
-DATE_LOW = 5e16  # 1971-08-02 16:53:20 in nanosec
-DATE_HIGH = 9.9e18  # 2260-1-1 in nanosec
-
-COMMON_HIST_TYPES = (
-    histogrammar.Categorize,
-    histogrammar.Bin,
-    histogrammar.SparselyBin,
-    histogrammar.specialized.CategorizeHistogramMethods,
-    histogrammar.specialized.HistogramMethods,
-    histogrammar.specialized.SparselyHistogramMethods,
-    histogrammar.specialized.CategorizeHistogramMethods,
-    histogrammar.specialized.TwoDimensionallyHistogramMethods,
-    histogrammar.specialized.SparselyTwoDimensionallyHistogramMethods,
-)
-
-
-def get_datatype(cls):
-    """Get histogrammar histogram datatype(s) of its axes
-
-    Return data type of the variable represented by the histogram.  If not
-    already set, will determine datatype automatically.
-
-    :returns: list with datatypes of all dimenensions of the histogram
-    :rtype: list
-    """
-    datatype = []
-    if isinstance(cls, histogrammar.Count):
-        return datatype
-    if isinstance(cls, histogrammar.Categorize):
-        if len(cls.bins) > 0:
-            dt = type(list(cls.bins.keys())[0])
-            dt = np.dtype(dt).type
-            if (dt is np.str_) or (dt is np.string_) or (dt is np.object_):
-                dt = str
-            datatype = [dt]
-    elif isinstance(cls, (histogrammar.Bin, histogrammar.SparselyBin)):
-        datatype = [np.number]
-        bin_centers = cls.bin_centers()
-        if len(bin_centers) > 0:
-            dt = type(bin_centers[-1])
-            dt = np.dtype(dt).type
-            datatype = [dt]
-            # HACK: making an educated guess for timestamp
-            # timestamp is in ns since 1970, so a huge number.
-            is_ts = DATE_LOW < bin_centers[-1] < DATE_HIGH
-            if is_ts:
-                datatype = [np.datetime64]
-    # histogram may have a subhistogram. Extract it and recurse
-    if hasattr(cls, "bins"):
-        hist = list(cls.bins.values())[0] if cls.bins else histogrammar.Count()
-    elif hasattr(cls, "values"):
-        hist = cls.values[0] if cls.values else histogrammar.Count()
-    else:
-        hist = histogrammar.Count()
-    return datatype + get_datatype(hist)
-
-
-@property
-def datatype(self):  # noqa
-    """Data type of histogram variable.
-
-    Return data type of the variable represented by the histogram.  If not
-    already set, will determine datatype automatically.
-
-    :returns: data type
-    :rtype: type or list(type)
-    """
-    # making an educated guess to determine data-type categories
-    if not hasattr(self, "_datatype"):
-        datatype = get_datatype(self)
-        if isinstance(datatype, list):
-            if len(datatype) == 1:
-                return datatype[0]
-            elif len(datatype) == 0:
-                return type(None)
-        return datatype
-
-    if isinstance(self._datatype, list):
-        if len(self._datatype) == 1:
-            return self._datatype[0]
-        elif len(self._datatype) == 0:
-            return type(None)
-    return self._datatype
-
-
-@datatype.setter
-def datatype(self, dt):
-    """Set data type of histogram variable.
-
-    Set data type of the variable represented by the histogram.
-
-    :param type dt: type of the variable represented by the histogram
-    :raises RunTimeError: if datatype has already been set, it will not overwritten
-    """
-    if hasattr(self, "_datatype"):
-        raise RuntimeError("datatype already set")
-    self._datatype = dt
-
-
-# --- we decorate here
-histogrammar.Bin.datatype = datatype
-histogrammar.SparselyBin.datatype = datatype
-histogrammar.Categorize.datatype = datatype
-histogrammar.Count.datatype = datatype
diff --git a/popmon/notebooks/popmon_tutorial_advanced.ipynb b/popmon/notebooks/popmon_tutorial_advanced.ipynb
index 315af09f..d8457de6 100644
--- a/popmon/notebooks/popmon_tutorial_advanced.ipynb
+++ b/popmon/notebooks/popmon_tutorial_advanced.ipynb
@@ -290,7 +290,7 @@
    "outputs": [],
    "source": [
     "split_hist = split_hists.query(\"date == '2015-07-05 12:00:00'\")\n",
-    "split_hist.histogram[0].hist.plot.matplotlib()"
+    "split_hist.histogram[0].plot.matplotlib()"
    ]
   },
   {
@@ -306,7 +306,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "split_hist.histogram_ref[0].hist.plot.matplotlib()"
+    "split_hist.histogram_ref[0].plot.matplotlib()"
    ]
   },
   {
diff --git a/popmon/pipeline/metrics.py b/popmon/pipeline/metrics.py
index 51abb3e4..61590864 100644
--- a/popmon/pipeline/metrics.py
+++ b/popmon/pipeline/metrics.py
@@ -21,8 +21,12 @@
 import logging
 
 import pandas as pd
+from histogrammar.dfinterface.make_histograms import (
+    get_bin_specs,
+    get_time_axes,
+    make_histograms,
+)
 
-from ..hist.filling.make_histograms import get_bin_specs, get_time_axes, make_histograms
 from ..pipeline.metrics_pipelines import (
     metrics_expanding_reference,
     metrics_external_reference,
diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py
index aeec0e12..4d4e16f2 100644
--- a/popmon/pipeline/report.py
+++ b/popmon/pipeline/report.py
@@ -21,10 +21,14 @@
 import logging
 
 import pandas as pd
+from histogrammar.dfinterface.make_histograms import (
+    get_bin_specs,
+    get_time_axes,
+    make_histograms,
+)
 
 from ..base import Module
 from ..config import config
-from ..hist.filling.make_histograms import get_bin_specs, get_time_axes, make_histograms
 from ..pipeline.report_pipelines import (
     ReportPipe,
     expanding_reference,
diff --git a/popmon/stitching/hist_stitcher.py b/popmon/stitching/hist_stitcher.py
index 2548f794..77d88d11 100644
--- a/popmon/stitching/hist_stitcher.py
+++ b/popmon/stitching/hist_stitcher.py
@@ -23,7 +23,6 @@
 
 from ..analysis.hist_numpy import assert_similar_hists
 from ..base import Module
-from ..hist.histogram import HistogramContainer
 
 
 class HistStitcher(Module):
@@ -233,7 +232,7 @@ def stitch_histograms(
                 if feature not in features_basis:
                     continue
                 self.logger.debug(f'Now inserting into histogram "{feature}"')
-                hist_list = [HistogramContainer(hd[key]) for hd in hists_delta]
+                hist_list = [hd[key] for hd in hists_delta]
                 stitched[feature] = self._insert_hists(
                     hists_basis[feature], hist_list, time_bin_idx, mode
                 )
@@ -258,7 +257,7 @@ def stitch_histograms(
             return hists_basis
         for feature in features_overlap:
             self.logger.debug(f'Now stitching histograms "{feature}"')
-            hist_list = [HistogramContainer(hd[feature]) for hd in hists_list]
+            hist_list = [hd[feature] for hd in hists_list]
             stitched[feature] = self._stitch_by_update(mode, hist_list)
         # add basis hists without any overlap
         for feature in features_basis:
@@ -279,10 +278,7 @@ def _find_max_time_bin_index(self, hists_basis, features_basis, time_axis):
         assert len(features_basis) > 0
         assert all([f.startswith(time_axis) for f in features_basis])
 
-        hist_list = [
-            h.hist if isinstance(h, HistogramContainer) else h
-            for h in hists_basis.values()
-        ]
+        hist_list = list(hists_basis.values())
 
         all_sparse = all([isinstance(h, hg.SparselyBin) for h in hist_list])
         all_cat = (
@@ -341,14 +337,10 @@ def _insert_hists(self, hbasis, hdelta_list, time_bin_idx, mode):
             raise TypeError("time_bin_idxs should be an (ordered) string or integer.")
 
         # consistency checks on histogram definitions
-        hbasis = hbasis.hist if isinstance(hbasis, HistogramContainer) else hbasis
         if not hasattr(hbasis, "bins"):
             raise RuntimeError(
                 "basis histogram does not have bins attribute. cannot insert."
             )
-        hdelta_list = [
-            hd.hist if isinstance(hd, HistogramContainer) else hd for hd in hdelta_list
-        ]
         if len(hbasis.bins) > 0:
             hbk0 = list(hbasis.bins.values())[0]
             assert_similar_hists([hbk0] + hdelta_list)
@@ -396,7 +388,6 @@ def _create_hist_with_time_axis(self, hist, time_bin_idx):
             raise TypeError(
                 "time_bin_idx not set. should be an (ordered) string or integer."
             )
-        hist = hist.hist if isinstance(hist, HistogramContainer) else hist
 
         ht = (
             hg.SparselyBin(binWidth=1.0, origin=0.0, quantity=lambda x: x)
@@ -419,10 +410,6 @@ def _stitch_by_update(self, mode, hist_list):
         :param list hist_list: list of input histogrammar histograms
         :return: list of consistent 1d numpy arrays with bin_entries for list of input histograms
         """
-        hist_list = [
-            hc.hist if isinstance(hc, HistogramContainer) else hc for hc in hist_list
-        ]
-
         # --- basic checks
         if len(hist_list) == 0:
             raise RuntimeError("Input histogram list has zero length.")
diff --git a/popmon/version.py b/popmon/version.py
index 17e7c99f..eee51c0b 100644
--- a/popmon/version.py
+++ b/popmon/version.py
@@ -1,6 +1,6 @@
 """THIS FILE IS AUTO-GENERATED BY SETUP.PY."""
 
 name = "popmon"
-version = "0.3.14"
-full_version = "0.3.14"
+version = "0.3.15"
+full_version = "0.3.15"
 release = True
diff --git a/popmon/visualization/histogram_section.py b/popmon/visualization/histogram_section.py
index c710f824..5d5e60ef 100644
--- a/popmon/visualization/histogram_section.py
+++ b/popmon/visualization/histogram_section.py
@@ -21,6 +21,7 @@
 import multiprocessing
 
 import pandas as pd
+from histogrammar.util import get_hist_props
 from joblib import Parallel, delayed
 from tqdm import tqdm
 
@@ -31,7 +32,6 @@
 )
 from ..base import Module
 from ..config import get_stat_description
-from ..hist.histogram import get_hist_props
 from ..visualization.utils import plot_overlay_1d_histogram_b64
 
 
diff --git a/requirements.txt b/requirements.txt
index 786ad260..fae04f6c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 numpy>=1.18.0
 pandas>=0.25.1
 scipy>=1.5.2
-histogrammar==1.0.12
+histogrammar>=1.0.23
 phik
 jinja2
 tqdm
diff --git a/setup.py b/setup.py
index af573b38..983945f6 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
 
 MAJOR = 0
 REVISION = 3
-PATCH = 14
+PATCH = 15
 DEV = False
 # NOTE: also update version at: README.rst
 
diff --git a/tests/popmon/analysis/profiling/test_hist_profiler.py b/tests/popmon/analysis/profiling/test_hist_profiler.py
index 659b2ae8..3e3f6c0f 100644
--- a/tests/popmon/analysis/profiling/test_hist_profiler.py
+++ b/tests/popmon/analysis/profiling/test_hist_profiler.py
@@ -3,7 +3,7 @@
 import pandas as pd
 
 from popmon.analysis.profiling.hist_profiler import HistProfiler
-from popmon.hist.histogram import HistogramContainer
+from popmon.hist.hist_utils import get_bin_centers
 
 
 def test_profile_hist1d():
@@ -17,9 +17,7 @@ def test_profile_hist1d():
     for i in range(split_len):
         h = hg.Bin(num_bins, 0, 1, lambda x: x)
         h.fill.numpy(np.random.uniform(0, 1, num_entries))
-        split.append(
-            {"date": pd.Timestamp("2019 - 1 - 1"), hist_name: HistogramContainer(h)}
-        )
+        split.append({"date": pd.Timestamp("2019 - 1 - 1"), hist_name: h})
 
     hp = HistProfiler(
         read_key="dummy_input",
@@ -32,5 +30,5 @@ def test_profile_hist1d():
 
     assert len(profiles) == split_len
     assert "p95" in profiles[0]
-    assert profiles[1]["max"] == np.max(split[1][hist_name].get_bin_centers()[0])
-    assert len(profiles[0][hist_name].hist.bin_entries()) == num_bins
+    assert profiles[1]["max"] == np.max(get_bin_centers(split[1][hist_name])[0])
+    assert len(profiles[0][hist_name].bin_entries()) == num_bins
diff --git a/tests/popmon/analysis/test_functions.py b/tests/popmon/analysis/test_functions.py
index 75eaaade..71ff0b7d 100644
--- a/tests/popmon/analysis/test_functions.py
+++ b/tests/popmon/analysis/test_functions.py
@@ -78,7 +78,7 @@ def test_expanding_hist():
 
     df = datastore["output_hist"]["num_employees"]
     h = df["histogram_sum"].values[-1]
-    bin_entries = h.hist.bin_entries()
+    bin_entries = h.bin_entries()
 
     check = np.array(
         [
@@ -238,7 +238,7 @@ def test_rolling_hist():
 
     df = datastore["output_hist"]["num_employees"]
     h = df["histogram_sum"].values[-2]
-    bin_entries = h.hist.bin_entries()
+    bin_entries = h.bin_entries()
 
     check = np.array(
         [
diff --git a/tests/popmon/analysis/test_hist_numpy.py b/tests/popmon/analysis/test_hist_numpy.py
index 4a113406..ba929151 100644
--- a/tests/popmon/analysis/test_hist_numpy.py
+++ b/tests/popmon/analysis/test_hist_numpy.py
@@ -1,3 +1,4 @@
+import histogrammar as hg
 import numpy as np
 import pandas as pd
 import pytest
@@ -13,8 +14,6 @@
     prepare_2dgrid,
     set_2dgrid,
 )
-from popmon.hist.histogram import HistogramContainer
-from popmon.hist.patched_histogrammer import histogrammar as hg
 
 
 def to_ns(x):
@@ -50,11 +49,7 @@ def get_test_histograms1():
     hist2.fill.numpy(df)
     hist3.fill.numpy(df)
 
-    hc1 = HistogramContainer(hist1)
-    hc2 = HistogramContainer(hist2)
-    hc3 = HistogramContainer(hist3)
-
-    return df, hc1, hc2, hc3
+    return df, hist1, hist2, hist3
 
 
 def get_test_histograms2():
@@ -75,20 +70,12 @@ def get_test_histograms2():
     hist3.fill.numpy(df)
     hist4.fill.numpy(df)
 
-    hc1 = HistogramContainer(hist1)
-    hc2 = HistogramContainer(hist2)
-    hc3 = HistogramContainer(hist3)
-    hc4 = HistogramContainer(hist4)
-
-    return df, hc1, hc2, hc3, hc4
+    return df, hist1, hist2, hist3, hist4
 
 
 def test_histogram():
     """Test the dummy histogram we're working with below"""
-    df, hc1, hc2, hc3 = get_test_histograms1()
-    hist1 = hc1.hist
-    hist2 = hc2.hist
-    hist3 = hc3.hist
+    df, hist1, hist2, hist3 = get_test_histograms1()
 
     assert hist1.entries == 5
     assert hist1.n_dim == 1
@@ -105,10 +92,7 @@ def test_histogram():
 
 def test_get_contentType():
     """Test getting type of a histogram"""
-    df, hc1, hc2, hc3 = get_test_histograms1()
-    hist1 = hc1.hist
-    hist2 = hc2.hist
-    hist3 = hc3.hist
+    df, hist1, hist2, hist3 = get_test_histograms1()
 
     assert get_contentType(hist1) == "Categorize"
     assert get_contentType(hist2) == "Bin"
@@ -149,10 +133,7 @@ def test_prepare_2dgrid():
 @pytest.mark.filterwarnings("ignore:Input histogram only has")
 def test_set_2dgrid():
     """Test setting the grid for extraction of number of entries for 2d hists"""
-    df, hc1, hc2, hc3 = get_test_histograms1()
-    hist1 = hc1.hist
-    hist2 = hc2.hist
-    hist3 = hc3.hist
+    df, hist1, hist2, hist3 = get_test_histograms1()
 
     xkeys1, ykeys1 = prepare_2dgrid(hist1)
     xkeys2, ykeys2 = prepare_2dgrid(hist2)
@@ -180,10 +161,7 @@ def test_set_2dgrid():
 @pytest.mark.filterwarnings("ignore:Input histogram only has")
 def test_get_2dgrid():
     """Test extraction of number of entries for 2d hists"""
-    df, hc1, hc2, hc3 = get_test_histograms1()
-    hist1 = hc1.hist
-    hist2 = hc2.hist
-    hist3 = hc3.hist
+    df, hist1, hist2, hist3 = get_test_histograms1()
 
     grid1 = get_2dgrid(hist1)
     grid2 = get_2dgrid(hist2)
@@ -232,17 +210,13 @@ def test_get_consistent_numpy_2dgrids():
     hist1.fill.numpy(df1)
     hist2.fill.numpy(df2)
 
-    hc0 = HistogramContainer(hist0)
-    hc1 = HistogramContainer(hist1)
-    hc2 = HistogramContainer(hist2)
-
     args = [""]
     try:
-        get_consistent_numpy_2dgrids([hc0, hc0])
+        get_consistent_numpy_2dgrids([hist0, hist0])
     except ValueError as e:
         args = e.args
 
-    grid2d_list = get_consistent_numpy_2dgrids([hc1, hc2])
+    grid2d_list = get_consistent_numpy_2dgrids([hist1, hist2])
 
     g1 = np.asarray(
         [
@@ -297,11 +271,12 @@ def test_get_consistent_numpy_1dhists():
     hist1.fill.numpy(df1)
     hist2.fill.numpy(df2)
 
-    hc1 = HistogramContainer(hist1)
-    hc2 = HistogramContainer(hist2)
-
-    nphist1, nphist2 = get_consistent_numpy_1dhists([hc1, hc2], get_bin_labels=False)
-    nphist_list, centers = get_consistent_numpy_1dhists([hc1, hc2], get_bin_labels=True)
+    nphist1, nphist2 = get_consistent_numpy_1dhists(
+        [hist1, hist2], get_bin_labels=False
+    )
+    nphist_list, centers = get_consistent_numpy_1dhists(
+        [hist1, hist2], get_bin_labels=True
+    )
 
     entries1 = [1.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0]
     entries2 = [0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0]
@@ -339,18 +314,14 @@ def test_get_consistent_numpy_entries():
     )
 
     # building 1d-, 2d-, and 3d-histogram (iteratively)
-    hist0 = HistogramContainer(hg.Categorize(unit("C")))
-    hist1 = HistogramContainer(hg.Categorize(unit("C")))
-    hist2 = HistogramContainer(
-        hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit("A"))
-    )
-    hist3 = HistogramContainer(
-        hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit("A"))
-    )
+    hist0 = hg.Categorize(unit("C"))
+    hist1 = hg.Categorize(unit("C"))
+    hist2 = hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit("A"))
+    hist3 = hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit("A"))
 
     # fill them
     for hist, df in zip([hist0, hist1, hist2, hist3], [df1, df2, df1, df2]):
-        hist.hist.fill.numpy(df)
+        hist.fill.numpy(df)
 
     e0, e1 = get_consistent_numpy_entries([hist0, hist1], get_bin_labels=False)
     _, labels01 = get_consistent_numpy_entries([hist0, hist1], get_bin_labels=True)
@@ -407,19 +378,12 @@ def test_check_similar_hists():
     for hist in [hist0, hist1, hist2, hist3, hist4, hist5]:
         hist.fill.numpy(df)
 
-    hc0 = HistogramContainer(hist0)
-    hc1 = HistogramContainer(hist1)
-    hc2 = HistogramContainer(hist2)
-    hc3 = HistogramContainer(hist3)
-    hc4 = HistogramContainer(hist4)
-    hc5 = HistogramContainer(hist5)
-
-    for hc in [hc0, hc1, hc2, hc3, hc4, hc5]:
-        assert check_similar_hists([hc, hc])
+    for hist in [hist0, hist1, hist2, hist3, hist4, hist5]:
+        assert check_similar_hists([hist, hist])
 
-    assert not check_similar_hists([hc0, hc1])
-    assert not check_similar_hists([hc2, hc3])
-    assert not check_similar_hists([hc4, hc5])
+    assert not check_similar_hists([hist0, hist1])
+    assert not check_similar_hists([hist2, hist3])
+    assert not check_similar_hists([hist4, hist5])
 
 
 @pytest.mark.filterwarnings("ignore:Input histograms have inconsistent")
@@ -455,32 +419,25 @@ def test_assert_similar_hists():
     for hist in [hist0, hist1, hist2, hist3, hist4, hist5]:
         hist.fill.numpy(df)
 
-    hc0 = HistogramContainer(hist0)
-    hc1 = HistogramContainer(hist1)
-    hc2 = HistogramContainer(hist2)
-    hc3 = HistogramContainer(hist3)
-    hc4 = HistogramContainer(hist4)
-    hc5 = HistogramContainer(hist5)
-
-    for hc in [hc0, hc1, hc2, hc3, hc4, hc5]:
-        assert check_similar_hists([hc, hc])
+    for hist in [hist0, hist1, hist2, hist3, hist4, hist5]:
+        assert check_similar_hists([hist, hist])
 
     args01 = [""]
     args23 = [""]
     args45 = [""]
 
     try:
-        assert_similar_hists([hc0, hc1])
+        assert_similar_hists([hist0, hist1])
     except ValueError as e:
         args01 = e.args
 
     try:
-        assert_similar_hists([hc2, hc3])
+        assert_similar_hists([hist2, hist3])
     except ValueError as e:
         args23 = e.args
 
     try:
-        assert_similar_hists([hc4, hc5])
+        assert_similar_hists([hist4, hist5])
     except ValueError as e:
         args45 = e.args
 
@@ -491,11 +448,8 @@ def test_assert_similar_hists():
 
 def test_datatype():
     """Test datatypes assigned to histograms"""
-    df, hc1, hc2, hc3 = get_test_histograms1()
-    hist1 = hc1.hist
-    hist2 = hc2.hist
-    hist3 = hc3.hist
+    df, hist1, hist2, hist3 = get_test_histograms1()
 
     assert hist1.datatype == str
-    np.testing.assert_array_equal(hist2.datatype, [np.float64, str])
-    np.testing.assert_array_equal(hist3.datatype, [np.datetime64, np.float64, str])
+    np.testing.assert_array_equal(hist2.datatype, [np.number, str])
+    np.testing.assert_array_equal(hist3.datatype, [np.datetime64, np.number, str])
diff --git a/tests/popmon/hist/test_histogram.py b/tests/popmon/hist/test_histogram.py
index 0d674771..213b4f8d 100644
--- a/tests/popmon/hist/test_histogram.py
+++ b/tests/popmon/hist/test_histogram.py
@@ -1,14 +1,17 @@
+import histogrammar as hg
 import numpy as np
 import pandas as pd
 
-from popmon.hist.histogram import (
-    HistogramContainer,
+from popmon.hist.hist_utils import (
+    is_numeric,
+    is_timestamp,
     project_on_x,
     project_split2dhist_on_axis,
+    sparse_bin_centers_x,
+    split_hist_along_first_dimension,
     sum_entries,
     sum_over_x,
 )
-from popmon.hist.patched_histogrammer import histogrammar as hg
 
 
 def get_test_data():
@@ -58,23 +61,18 @@ def test_histogrammar():
 def test_histogram_attributes():
     hist1, hist2, hist3 = get_histograms()
 
-    hist_obj1 = HistogramContainer(hist1)
-    hist_obj2 = HistogramContainer(hist2)
-    hist_obj3 = HistogramContainer(hist3)
-
-    assert hist_obj1.is_num is False
-    assert hist_obj1.is_ts is False
-    assert hist_obj2.is_num is True
-    assert hist_obj2.is_ts is False
-    assert hist_obj3.is_num is True
-    assert hist_obj3.is_ts is True
+    assert is_numeric(hist1) is False
+    assert is_timestamp(hist1) is False
+    assert is_numeric(hist2) is True
+    assert is_timestamp(hist2) is False
+    assert is_numeric(hist3) is True
+    assert is_timestamp(hist3) is True
 
 
 def test_sparse_bin_centers_x():
     hist1, hist2, hist3 = get_histograms()
 
-    hist_obj3 = HistogramContainer(hist3)
-    centers3, values3 = hist_obj3.sparse_bin_centers_x()
+    centers3, values3 = sparse_bin_centers_x(hist3)
 
     np.testing.assert_array_equal(
         centers3, [1.2308112e18, 1.2308976e18, 1.2311568e18, 1.2312432e18, 1.2313296e18]
@@ -83,18 +81,15 @@ def test_sparse_bin_centers_x():
 
 def test_split_hist_along_first_dimension():
     hist1, hist2, hist3 = get_histograms()
-    hist_obj1 = HistogramContainer(hist1)
-    hist_obj2 = HistogramContainer(hist2)
-    hist_obj3 = HistogramContainer(hist3)
 
-    split3a = hist_obj3.split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=True, convert_time_index=True
+    split3a = split_hist_along_first_dimension(
+        hist=hist3, xname="x", yname="y", short_keys=True, convert_time_index=True
     )
-    split3b = hist_obj3.split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=True, convert_time_index=False
+    split3b = split_hist_along_first_dimension(
+        hist=hist3, xname="x", yname="y", short_keys=True, convert_time_index=False
     )
-    split3c = hist_obj3.split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=False, convert_time_index=True
+    split3c = split_hist_along_first_dimension(
+        hist=hist3, xname="x", yname="y", short_keys=False, convert_time_index=True
     )
 
     keys3a = list(split3a.keys())
@@ -121,14 +116,14 @@ def test_split_hist_along_first_dimension():
     np.testing.assert_array_equal(keys3b, check3b)
     np.testing.assert_array_equal(keys3c, check3c)
 
-    split2a = hist_obj2.split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=True, convert_time_index=True
+    split2a = split_hist_along_first_dimension(
+        hist=hist2, xname="x", yname="y", short_keys=True, convert_time_index=True
     )
-    split2b = hist_obj2.split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=True, convert_time_index=False
+    split2b = split_hist_along_first_dimension(
+        hist=hist2, xname="x", yname="y", short_keys=True, convert_time_index=False
     )
-    split2c = hist_obj2.split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=False, convert_time_index=False
+    split2c = split_hist_along_first_dimension(
+        hist=hist2, xname="x", yname="y", short_keys=False, convert_time_index=False
     )
 
     keys2a = list(split2a.keys())
@@ -143,14 +138,14 @@ def test_split_hist_along_first_dimension():
     np.testing.assert_array_equal(keys2b, check2b)
     np.testing.assert_array_equal(keys2c, check2c)
 
-    split1a = hist_obj1.split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=True, convert_time_index=True
+    split1a = split_hist_along_first_dimension(
+        hist=hist1, xname="x", yname="y", short_keys=True, convert_time_index=True
     )
-    split1b = hist_obj1.split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=True, convert_time_index=False
+    split1b = split_hist_along_first_dimension(
+        hist=hist1, xname="x", yname="y", short_keys=True, convert_time_index=False
     )
-    split1c = hist_obj1.split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=False, convert_time_index=False
+    split1c = split_hist_along_first_dimension(
+        hist=hist1, xname="x", yname="y", short_keys=False, convert_time_index=False
     )
 
     keys1a = list(split1a.keys())
@@ -284,17 +279,17 @@ def test_project_split2dhist_on_axis():
         hist.fill.numpy(df)
 
     # split along date axis
-    splitAC = HistogramContainer(histDAC).split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=True, convert_time_index=True
+    splitAC = split_hist_along_first_dimension(
+        hist=histDAC, xname="x", yname="y", short_keys=True, convert_time_index=True
     )
-    splitCA = HistogramContainer(histDCA).split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=True, convert_time_index=True
+    splitCA = split_hist_along_first_dimension(
+        hist=histDCA, xname="x", yname="y", short_keys=True, convert_time_index=True
     )
-    splitA0 = HistogramContainer(histDA).split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=True, convert_time_index=True
+    splitA0 = split_hist_along_first_dimension(
+        hist=histDA, xname="x", yname="y", short_keys=True, convert_time_index=True
     )
-    splitC0 = HistogramContainer(histDC).split_hist_along_first_dimension(
-        xname="x", yname="y", short_keys=True, convert_time_index=True
+    splitC0 = split_hist_along_first_dimension(
+        hist=histDC, xname="x", yname="y", short_keys=True, convert_time_index=True
     )
 
     splitA1 = project_split2dhist_on_axis(splitAC, "x")
@@ -348,5 +343,5 @@ def test_datatype():
 
     assert isinstance(None, hist0.datatype)
     assert hist1.datatype == str
-    np.testing.assert_array_equal(hist2.datatype, [np.float64, str])
-    np.testing.assert_array_equal(hist3.datatype, [np.datetime64, np.float64, str])
+    np.testing.assert_array_equal(hist2.datatype, [np.number, str])
+    np.testing.assert_array_equal(hist3.datatype, [np.datetime64, np.number, str])
diff --git a/tests/popmon/hist/test_numpy_histogrammar.py b/tests/popmon/hist/test_numpy_histogrammar.py
deleted file mode 100644
index 5e4409c3..00000000
--- a/tests/popmon/hist/test_numpy_histogrammar.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python3
-
-import pytest
-
-from popmon.base import Pipeline
-from popmon.hist.filling import NumpyHistogrammar
-
-
-def test_assert_dataframe():
-    pandas_filler = NumpyHistogrammar(
-        features=["age", "fruit", "latitude", ["longitude", "active"]]
-    )
-    with pytest.raises(TypeError):
-        pandas_filler.assert_dataframe("coconut")
-
-
-def test_get_histograms():
-
-    np_array = pytest.test_df.to_records(index=False)
-
-    np_filler = NumpyHistogrammar(
-        features=[
-            "date",
-            "isActive",
-            "age",
-            "eyeColor",
-            "gender",
-            "company",
-            "latitude",
-            "longitude",
-            ["isActive", "age"],
-            ["latitude", "longitude"],
-        ],
-        bin_specs={
-            "longitude": {"bin_width": 5, "bin_offset": 0},
-            "latitude": {"bin_width": 5, "bin_offset": 0},
-        },
-    )
-    current_hists = np_filler.get_histograms(np_array)
-
-    assert current_hists["age"].toJson() == pytest.age
-    assert current_hists["company"].toJson() == pytest.company
-    assert current_hists["date"].toJson() == pytest.date
-    assert current_hists["eyeColor"].toJson() == pytest.eyesColor
-    assert current_hists["gender"].toJson() == pytest.gender
-    assert current_hists["isActive"].toJson() == pytest.isActive
-    assert current_hists["isActive:age"].toJson() == pytest.isActive_age
-    assert current_hists["latitude"].toJson() == pytest.latitude
-    assert current_hists["longitude"].toJson() == pytest.longitude
-    assert current_hists["latitude:longitude"].toJson() == pytest.latitude_longitude
-
-
-def test_get_histograms_module():
-
-    np_filler = NumpyHistogrammar(
-        features=[
-            "date",
-            "isActive",
-            "age",
-            "eyeColor",
-            "gender",
-            "company",
-            "latitude",
-            "longitude",
-            ["isActive", "age"],
-            ["latitude", "longitude"],
-        ],
-        bin_specs={
-            "longitude": {"bin_width": 5, "bin_offset": 0},
-            "latitude": {"bin_width": 5, "bin_offset": 0},
-        },
-        read_key="input",
-        store_key="output",
-    )
-
-    pipeline = Pipeline(modules=[np_filler])
-    datastore = pipeline.transform(
-        datastore={"input": pytest.test_df.to_records(index=False)}
-    )
-
-    assert "output" in datastore
-    current_hists = datastore["output"]
-
-    assert current_hists["age"].toJson() == pytest.age
-    assert current_hists["company"].toJson() == pytest.company
-    assert current_hists["date"].toJson() == pytest.date
-    assert current_hists["eyeColor"].toJson() == pytest.eyesColor
-    assert current_hists["gender"].toJson() == pytest.gender
-    assert current_hists["isActive"].toJson() == pytest.isActive
-    assert current_hists["isActive:age"].toJson() == pytest.isActive_age
-    assert current_hists["latitude"].toJson() == pytest.latitude
-    assert current_hists["longitude"].toJson() == pytest.longitude
-    assert current_hists["latitude:longitude"].toJson() == pytest.latitude_longitude
diff --git a/tests/popmon/hist/test_pandas_histogrammar.py b/tests/popmon/hist/test_pandas_histogrammar.py
deleted file mode 100644
index eaa7f28d..00000000
--- a/tests/popmon/hist/test_pandas_histogrammar.py
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env python3
-
-import numpy as np
-import pytest
-
-from popmon.base import Pipeline
-from popmon.hist.filling import (
-    PandasHistogrammar,
-    get_bin_specs,
-    get_time_axes,
-    make_histograms,
-)
-
-
-def test_get_histograms():
-
-    pandas_filler = PandasHistogrammar(
-        features=[
-            "date",
-            "isActive",
-            "age",
-            "eyeColor",
-            "gender",
-            "company",
-            "latitude",
-            "longitude",
-            ["isActive", "age"],
-            ["latitude", "longitude"],
-        ],
-        bin_specs={
-            "longitude": {"bin_width": 5, "bin_offset": 0},
-            "latitude": {"bin_width": 5, "bin_offset": 0},
-        },
-    )
-    current_hists = pandas_filler.get_histograms(pytest.test_df)
-
-    assert current_hists["age"].toJson() == pytest.age
-    assert current_hists["company"].toJson() == pytest.company
-    assert current_hists["date"].toJson() == pytest.date
-    assert current_hists["eyeColor"].toJson() == pytest.eyesColor
-    assert current_hists["gender"].toJson() == pytest.gender
-    assert current_hists["isActive"].toJson() == pytest.isActive
-    assert current_hists["isActive:age"].toJson() == pytest.isActive_age
-    assert current_hists["latitude"].toJson() == pytest.latitude
-    assert current_hists["longitude"].toJson() == pytest.longitude
-    assert current_hists["latitude:longitude"].toJson() == pytest.latitude_longitude
-
-
-def test_make_histograms():
-
-    features = [
-        "date",
-        "isActive",
-        "age",
-        "eyeColor",
-        "gender",
-        "company",
-        "latitude",
-        "longitude",
-        ["isActive", "age"],
-        ["latitude", "longitude"],
-        "transaction",
-    ]
-    bin_specs = {
-        "transaction": {"num": 100, "low": -2000, "high": 2000},
-        "longitude": {"bin_width": 5, "bin_offset": 0},
-        "latitude": {"bin_width": 5, "bin_offset": 0},
-    }
-
-    current_hists = make_histograms(
-        pytest.test_df, features=features, binning="unit", bin_specs=bin_specs
-    )
-
-    assert current_hists["age"].toJson() == pytest.age
-    assert current_hists["company"].toJson() == pytest.company
-    assert current_hists["date"].toJson() == pytest.date
-    assert current_hists["eyeColor"].toJson() == pytest.eyesColor
-    assert current_hists["gender"].toJson() == pytest.gender
-    assert current_hists["isActive"].toJson() == pytest.isActive
-    assert current_hists["isActive:age"].toJson() == pytest.isActive_age
-    assert current_hists["latitude"].toJson() == pytest.latitude
-    assert current_hists["longitude"].toJson() == pytest.longitude
-    assert current_hists["latitude:longitude"].toJson() == pytest.latitude_longitude
-    assert current_hists["transaction"].toJson() == pytest.transaction
-
-
-def test_make_histograms_no_time_axis():
-
-    hists, features, bin_specs, time_axis, var_dtype = make_histograms(
-        pytest.test_df, time_axis="", ret_specs=True
-    )
-
-    assert len(hists) == 21
-    assert len(features) == 21
-    assert len(bin_specs) == 6
-    assert len(var_dtype) == 21
-    assert time_axis == ""
-    assert "date" in hists
-    h = hists["date"]
-    assert h.binWidth == 751582381944448.0
-    for cols in features:
-        cols = cols.split(":")
-        assert len(cols) == 1
-    for f, bs in bin_specs.items():
-        assert isinstance(bs, dict)
-    assert "age" in bin_specs
-    dateage = bin_specs["age"]
-    assert dateage["bin_width"] == 2.0
-    assert dateage["bin_offset"] == 9.5
-
-
-def test_make_histograms_with_time_axis():
-
-    hists, features, bin_specs, time_axis, var_dtype = make_histograms(
-        pytest.test_df, time_axis=True, ret_specs=True
-    )
-
-    assert len(hists) == 20
-    assert len(features) == 20
-    assert len(bin_specs) == 20
-    assert len(var_dtype) == 21
-    assert time_axis == "date"
-    assert "date:age" in hists
-    h = hists["date:age"]
-    assert h.binWidth == 751582381944448.0
-    for cols in features:
-        cols = cols.split(":")
-        assert len(cols) == 2 and cols[0] == "date"
-    for f, bs in bin_specs.items():
-        assert len(bs) == 2
-    assert "date:age" in bin_specs
-    dateage = bin_specs["date:age"]
-    assert dateage[0]["bin_width"] == 751582381944448.0
-    assert dateage[1]["bin_width"] == 2.0
-    assert dateage[1]["bin_offset"] == 9.5
-
-    # test get_bin_specs 1
-    bin_specs = get_bin_specs(hists)
-    assert "date:age" in bin_specs
-    dateage = bin_specs["date:age"]
-    assert dateage[0]["bin_width"] == 751582381944448.0
-    assert dateage[1]["bin_width"] == 2.0
-    assert dateage[1]["bin_offset"] == 9.5
-
-    # test get_bin_specs 2
-    bin_specs = get_bin_specs(hists, skip_first_axis=True)
-    assert "age" in bin_specs
-    age = bin_specs["age"]
-    assert age["bin_width"] == 2.0
-    assert age["bin_offset"] == 9.5
-
-    # test get_bin_specs 3
-    bin_specs = get_bin_specs(hists["date:age"])
-    assert bin_specs[0]["bin_width"] == 751582381944448.0
-    assert bin_specs[1]["bin_width"] == 2.0
-    assert bin_specs[1]["bin_offset"] == 9.5
-
-    # test get_bin_specs 4
-    bin_specs = get_bin_specs(hists["date:age"], skip_first_axis=True)
-    assert bin_specs["bin_width"] == 2.0
-    assert bin_specs["bin_offset"] == 9.5
-
-
-def test_make_histograms_unit_binning():
-
-    hists, features, bin_specs, time_axis, var_dtype = make_histograms(
-        pytest.test_df, binning="unit", time_axis="", ret_specs=True
-    )
-
-    assert len(hists) == 21
-    assert len(features) == 21
-    assert len(bin_specs) == 0
-    assert len(var_dtype) == 21
-    assert time_axis == ""
-    assert "date" in hists
-    h = hists["date"]
-    assert h.binWidth == 2592000000000000
-    for cols in features:
-        cols = cols.split(":")
-        assert len(cols) == 1
-    for f, bs in bin_specs.items():
-        assert isinstance(bs, dict)
-    assert "age" in hists
-    h = hists["age"]
-    assert h.binWidth == 1.0
-    assert h.origin == 0.0
-
-
-def test_get_histograms_module():
-
-    pandas_filler = PandasHistogrammar(
-        features=[
-            "date",
-            "isActive",
-            "age",
-            "eyeColor",
-            "gender",
-            "company",
-            "latitude",
-            "longitude",
-            ["isActive", "age"],
-            ["latitude", "longitude"],
-        ],
-        bin_specs={
-            "longitude": {"bin_width": 5, "bin_offset": 0},
-            "latitude": {"bin_width": 5, "bin_offset": 0},
-        },
-        read_key="input",
-        store_key="output",
-    )
-
-    pipeline = Pipeline(modules=[pandas_filler])
-    datastore = pipeline.transform(datastore={"input": pytest.test_df})
-
-    assert "output" in datastore
-    current_hists = datastore["output"]
-    assert current_hists["age"].toJson() == pytest.age
-    assert current_hists["company"].toJson() == pytest.company
-    assert current_hists["date"].toJson() == pytest.date
-    assert current_hists["eyeColor"].toJson() == pytest.eyesColor
-    assert current_hists["gender"].toJson() == pytest.gender
-    assert current_hists["isActive"].toJson() == pytest.isActive
-    assert current_hists["isActive:age"].toJson() == pytest.isActive_age
-    assert current_hists["latitude"].toJson() == pytest.latitude
-    assert current_hists["longitude"].toJson() == pytest.longitude
-    assert current_hists["latitude:longitude"].toJson() == pytest.latitude_longitude
-
-
-def test_get_time_axes():
-    time_axes = get_time_axes(pytest.test_df)
-    np.testing.assert_array_equal(time_axes, ["date"])
diff --git a/tests/popmon/hist/test_spark_histogrammar.py b/tests/popmon/hist/test_spark_histogrammar.py
deleted file mode 100644
index 714362f7..00000000
--- a/tests/popmon/hist/test_spark_histogrammar.py
+++ /dev/null
@@ -1,255 +0,0 @@
-from os.path import abspath, dirname, join
-
-import pandas as pd
-import pytest
-
-# from popmon.hist.filling import make_histograms
-from popmon.base import Pipeline
-from popmon.hist.filling import SparkHistogrammar
-
-try:
-    from pyspark.sql import SparkSession
-
-    spark_found = True
-except (ModuleNotFoundError, AttributeError):
-    spark_found = False
-
-
-def get_spark():
-    if not spark_found:
-        return None
-
-    current_path = dirname(abspath(__file__))
-
-    hist_spark_jar = join(current_path, "jars/histogrammar-sparksql_2.11-1.0.11.jar")
-    hist_jar = join(current_path, "jars/histogrammar_2.11-1.0.11.jar")
-
-    spark = (
-        SparkSession.builder.master("local")
-        .appName("popmon-pytest")
-        .config("spark.jars", f"{hist_spark_jar},{hist_jar}")
-        .config("spark.sql.execution.arrow.enabled", "false")
-        .config("spark.sql.session.timeZone", "GMT")
-        .getOrCreate()
-    )
-    return spark
-
-
-@pytest.fixture
-def spark_co():
-    """
-    :return: Spark configuration
-    """
-    spark = get_spark()
-    return spark
-
-
-@pytest.mark.spark
-@pytest.mark.skipif(not spark_found, reason="spark not found")
-@pytest.mark.filterwarnings(
-    "ignore:createDataFrame attempted Arrow optimization because"
-)
-def test_get_histograms(spark_co):
-    pytest.age["data"]["name"] = "b'age'"
-    pytest.company["data"]["name"] = "b'company'"
-    pytest.eyesColor["data"]["name"] = "b'eyeColor'"
-    pytest.gender["data"]["name"] = "b'gender'"
-    pytest.isActive["data"]["name"] = "b'isActive'"
-    pytest.latitude["data"]["name"] = "b'latitude'"
-    pytest.longitude["data"]["name"] = "b'longitude'"
-    pytest.transaction["data"]["name"] = "b'transaction'"
-
-    pytest.latitude_longitude["data"]["name"] = "b'latitude:longitude'"
-    pytest.latitude_longitude["data"]["bins:name"] = "unit_func"
-
-    spark = spark_co
-
-    spark_df = spark.createDataFrame(pytest.test_df)
-
-    spark_filler = SparkHistogrammar(
-        features=[
-            "date",
-            "isActive",
-            "age",
-            "eyeColor",
-            "gender",
-            "company",
-            "latitude",
-            "longitude",
-            ["isActive", "age"],
-            ["latitude", "longitude"],
-            "transaction",
-        ],
-        bin_specs={
-            "transaction": {"num": 100, "low": -2000, "high": 2000},
-            "longitude": {"bin_width": 5.0, "bin_offset": 0.0},
-            "latitude": {"bin_width": 5.0, "bin_offset": 0.0},
-        },
-        read_key="input",
-        store_key="output",
-    )
-
-    # test get_histograms() function call
-    current_hists = spark_filler.get_histograms(spark_df)
-    # current_hists = make_histograms(spark_df, features, bin_specs)
-    assert current_hists["age"].toJson() == pytest.age
-    assert current_hists["company"].toJson() == pytest.company
-    assert current_hists["eyeColor"].toJson() == pytest.eyesColor
-    assert current_hists["gender"].toJson() == pytest.gender
-    assert current_hists["latitude"].toJson() == pytest.latitude
-    assert current_hists["longitude"].toJson() == pytest.longitude
-    assert current_hists["transaction"].toJson() == pytest.transaction
-
-    # import json
-    # with open('tests/popmon/hist/resource/transaction.json', 'w') as outfile:
-    #     json.dump(current_hists["transaction"].toJson(), outfile, indent=4)
-
-
-@pytest.mark.spark
-@pytest.mark.skipif(not spark_found, reason="spark not found")
-@pytest.mark.filterwarnings(
-    "ignore:createDataFrame attempted Arrow optimization because"
-)
-def test_get_histograms_module(spark_co):
-    pytest.age["data"]["name"] = "b'age'"
-    pytest.company["data"]["name"] = "b'company'"
-    pytest.eyesColor["data"]["name"] = "b'eyeColor'"
-    pytest.gender["data"]["name"] = "b'gender'"
-    pytest.isActive["data"]["name"] = "b'isActive'"
-    pytest.latitude["data"]["name"] = "b'latitude'"
-    pytest.longitude["data"]["name"] = "b'longitude'"
-
-    pytest.latitude_longitude["data"]["name"] = "b'latitude:longitude'"
-    pytest.latitude_longitude["data"]["bins:name"] = "unit_func"
-
-    spark = spark_co
-
-    spark_df = spark.createDataFrame(pytest.test_df)
-
-    spark_filler = SparkHistogrammar(
-        features=[
-            "date",
-            "isActive",
-            "age",
-            "eyeColor",
-            "gender",
-            "company",
-            "latitude",
-            "longitude",
-            ["isActive", "age"],
-            ["latitude", "longitude"],
-        ],
-        bin_specs={
-            "longitude": {"bin_width": 5.0, "bin_offset": 0.0},
-            "latitude": {"bin_width": 5.0, "bin_offset": 0.0},
-        },
-        read_key="input",
-        store_key="output",
-    )
-
-    # test transform() function call
-    pipeline = Pipeline(modules=[spark_filler])
-    datastore = pipeline.transform(datastore={"input": spark_df})
-
-    assert "output" in datastore
-    current_hists = datastore["output"]
-    assert current_hists["age"].toJson() == pytest.age
-    assert current_hists["company"].toJson() == pytest.company
-    assert current_hists["eyeColor"].toJson() == pytest.eyesColor
-    assert current_hists["gender"].toJson() == pytest.gender
-    assert current_hists["latitude"].toJson() == pytest.latitude
-    assert current_hists["longitude"].toJson() == pytest.longitude
-    # assert current_hists['date'].toJson() == pytest.date
-    # assert current_hists['isActive'].toJson() == pytest.isActive
-    # assert current_hists['isActive:age'].toJson() == pytest.isActive_age
-    # assert current_hists['latitude:longitude'].toJson() == pytest.latitude_longitude
-
-
-@pytest.mark.spark
-@pytest.mark.skipif(not spark_found, reason="spark not found")
-@pytest.mark.filterwarnings(
-    "ignore:createDataFrame attempted Arrow optimization because"
-)
-def test_get_histograms_timestamp(spark_co):
-    from pyspark.sql.functions import to_timestamp
-
-    spark = spark_co
-
-    data_date = [
-        "2018-12-10 00:00:00",
-        "2018-12-10 00:00:00",
-        "2018-12-10 00:00:00",
-        "2018-12-10 00:00:00",
-        "2018-12-10 00:00:00",
-        "2018-12-17 00:00:00",
-        "2018-12-17 00:00:00",
-        "2018-12-17 00:00:00",
-        "2018-12-17 00:00:00",
-        "2018-12-19 00:00:00",
-    ]
-
-    df = pd.DataFrame(data_date, columns=["dt"])
-    sdf = spark.createDataFrame(df).withColumn(
-        "dt", to_timestamp("dt", "yyyy-MM-dd HH:mm:ss")
-    )
-    expected = {
-        "data": {
-            "binWidth": 2592000000000000.0,
-            "bins": {"108": 9.0, "109": 1.0},
-            "bins:type": "Count",
-            "entries": 10.0,
-            "name": "b'dt'",
-            "nanflow": 0.0,
-            "nanflow:type": "Count",
-            "origin": 1.2625632e18,
-        },
-        "type": "SparselyBin",
-        "version": "1.0",
-    }
-    filler = SparkHistogrammar(features=["dt"])
-    current_hists = filler.get_histograms(sdf)
-    assert current_hists["dt"].toJson() == expected
-
-
-@pytest.mark.spark
-@pytest.mark.skipif(not spark_found, reason="spark not found")
-@pytest.mark.filterwarnings(
-    "ignore:createDataFrame attempted Arrow optimization because"
-)
-def test_get_histograms_date(spark_co):
-    from pyspark.sql.functions import to_date
-
-    spark = spark_co
-
-    data_date = [
-        "2018-12-10",
-        "2018-12-10",
-        "2018-12-10",
-        "2018-12-10",
-        "2018-12-10",
-        "2018-12-17",
-        "2018-12-17",
-        "2018-12-17",
-        "2018-12-17",
-        "2018-12-19",
-    ]
-
-    df = pd.DataFrame(data_date, columns=["dt"])
-    sdf = spark.createDataFrame(df).withColumn("dt", to_date("dt", "yyyy-MM-dd"))
-    expected = {
-        "data": {
-            "binWidth": 2592000000000000.0,
-            "bins": {"108": 9.0, "109": 1.0},
-            "bins:type": "Count",
-            "entries": 10.0,
-            "name": "b'dt'",
-            "nanflow": 0.0,
-            "nanflow:type": "Count",
-            "origin": 1.2625632e18,
-        },
-        "type": "SparselyBin",
-        "version": "1.0",
-    }
-    filler = SparkHistogrammar(features=["dt"])
-    current_hists = filler.get_histograms(sdf)
-    assert current_hists["dt"].toJson() == expected
diff --git a/tests/popmon/pipeline/test_report.py b/tests/popmon/pipeline/test_report.py
index c033d558..a2efe8d8 100644
--- a/tests/popmon/pipeline/test_report.py
+++ b/tests/popmon/pipeline/test_report.py
@@ -3,7 +3,7 @@
 
 from popmon import resources
 from popmon.base import Pipeline
-from popmon.hist.filling.make_histograms import get_bin_specs
+from popmon.hist.filling import get_bin_specs
 from popmon.io import JsonReader
 from popmon.pipeline.report import df_stability_report, stability_report
 
@@ -74,14 +74,10 @@ def test_df_stability_report_self():
     hists = datastore["hists"]
     bin_specs = get_bin_specs(hists)
 
-    assert pd.Timedelta(time_width).value == bin_specs["date:eyeColor"][0]["bin_width"]
-    assert (
-        pd.Timestamp(time_offset).value == bin_specs["date:eyeColor"][0]["bin_offset"]
-    )
-    assert pd.Timedelta(time_width).value == bin_specs["date:latitude"][0]["bin_width"]
-    assert (
-        pd.Timestamp(time_offset).value == bin_specs["date:latitude"][0]["bin_offset"]
-    )
+    assert pd.Timedelta(time_width).value == bin_specs["date:eyeColor"][0]["binWidth"]
+    assert pd.Timestamp(time_offset).value == bin_specs["date:eyeColor"][0]["origin"]
+    assert pd.Timedelta(time_width).value == bin_specs["date:latitude"][0]["binWidth"]
+    assert pd.Timestamp(time_offset).value == bin_specs["date:latitude"][0]["origin"]
 
 
 def test_df_stability_report_external():
diff --git a/tests/popmon/stats/test_numpy.py b/tests/popmon/stats/test_numpy.py
index ca92a2ba..3cf15ff7 100644
--- a/tests/popmon/stats/test_numpy.py
+++ b/tests/popmon/stats/test_numpy.py
@@ -197,15 +197,15 @@ def test_statistics_1():
     def get_quantiles(q):
         _quantiles = np.zeros((3, 6))
         for i in range(a.shape[0]):
-            for l in range(a.shape[3]):
-                isort = np.argsort(_values[i, l])
-                v = _values[i, l][isort]
-                u = _weights[i, l][isort]
+            for ll in range(a.shape[3]):
+                isort = np.argsort(_values[i, ll])
+                v = _values[i, ll][isort]
+                u = _weights[i, ll][isort]
                 U = u.cumsum()
                 r = (U - 0.5 * u) / U[-1]
                 for m in range(1, len(u)):
                     if r[m - 1] <= q and r[m] > q:
-                        _quantiles[i, l] = v[m - 1] + (q - r[m - 1]) / (
+                        _quantiles[i, ll] = v[m - 1] + (q - r[m - 1]) / (
                             r[m] - r[m - 1]
                         ) * (v[m] - v[m - 1])
                         break
diff --git a/tests/popmon/visualization/test_report_generator.py b/tests/popmon/visualization/test_report_generator.py
index 75ffa93e..ddbd4d34 100644
--- a/tests/popmon/visualization/test_report_generator.py
+++ b/tests/popmon/visualization/test_report_generator.py
@@ -53,5 +53,5 @@ def test_report_generator():
     assert "final_report" in datastore
     assert (
         isinstance(datastore["final_report"], str)
-        and len(datastore["final_report"]) > 0
+        and len(datastore["final_report"]) > 0  # noqa: W503
     )