From 1c25fe481da0eb7faa99677cd9db34ac43d503f2 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Mon, 26 Jun 2017 17:53:45 +0200 Subject: [PATCH] Add support for column specific fillna to viz Fix #3029 --- superset/connectors/base/models.py | 4 ++++ superset/viz.py | 19 ++++++++++++++++++- tests/core_tests.py | 11 +++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/superset/connectors/base/models.py b/superset/connectors/base/models.py index 14cac09aba78c..bf66c606842e3 100644 --- a/superset/connectors/base/models.py +++ b/superset/connectors/base/models.py @@ -64,6 +64,10 @@ def uid(self): def column_names(self): return sorted([c.column_name for c in self.columns]) + @property + def columns_types(self): + return {c.column_name: c.type for c in self.columns} + @property def main_dttm_col(self): return "timestamp" diff --git a/superset/viz.py b/superset/viz.py index 47eb7a920a9fc..1fe7571e2ce18 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -43,6 +43,7 @@ class BaseViz(object): verbose_name = "Base Viz" credits = "" is_timeseries = False + default_fillna = 0 def __init__(self, datasource, form_data): if not datasource: @@ -61,6 +62,21 @@ def __init__(self, datasource, form_data): self.status = None self.error_message = None + def get_fillna_for_type(self, col_type): + """Returns the value for use as filler for a specific Column.type""" + if col_type: + if col_type == 'TEXT' or col_type.startswith('VARCHAR'): + return ' NULL' + return self.default_fillna + + def get_fillna_for_columns(self, columns=None): + """Returns a dict or scalar that can be passed to DataFrame.fillna""" + if columns is None: + return self.default_fillna + columns_types = self.datasource.columns_types + fillna = {c: self.get_fillna_for_type(columns_types.get(c)) for c in columns} + return fillna + def get_df(self, query_obj=None): """Returns a pandas dataframe based on the query object""" if not query_obj: @@ -102,7 +118,8 @@ def get_df(self, query_obj=None): if self.datasource.offset: df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset) df.replace([np.inf, -np.inf], np.nan) - df = df.fillna(0) + fillna = self.get_fillna_for_columns(df.columns) + df = df.fillna(fillna) return df def get_extra_filters(self): diff --git a/tests/core_tests.py b/tests/core_tests.py index f79dec91fa277..e68dea96ad231 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -767,5 +767,16 @@ def test_slice_query_endpoint(self): assert 'language' in resp self.logout(); + def test_viz_get_fillna_for_columns(self): + slc = self.get_slice("Girls", db.session) + q = slc.viz.query_obj() + results = slc.viz.datasource.query(q) + fillna_columns = slc.viz.get_fillna_for_columns(results.df.columns) + self.assertDictEqual( + fillna_columns, + {'name': ' NULL', 'sum__num': 0} + ) + + if __name__ == '__main__': unittest.main()