From 57990bfd835936726077964ff84f232704915bc0 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Mon, 23 May 2016 13:06:35 -0700 Subject: [PATCH] Implementing druid's regex filters (#501) * Implementing druid's regex filters * Debugging * Debuggin' --- caravel/forms.py | 55 ++++++++++++++++++++++++--------------------- caravel/models.py | 8 +++++-- caravel/views.py | 17 -------------- setup.py | 2 +- tests/core_tests.py | 1 + 5 files changed, 37 insertions(+), 46 deletions(-) diff --git a/caravel/forms.py b/caravel/forms.py index 5c4e1a70bf921..4c1b718756980 100644 --- a/caravel/forms.py +++ b/caravel/forms.py @@ -642,20 +642,6 @@ class QueryForm(OmgWtForm): collapsed_fieldsets = HiddenField() viz_type = self.field_dict.get('viz_type') - filter_cols = viz.datasource.filterable_column_names or [''] - for i in range(10): - setattr(QueryForm, 'flt_col_' + str(i), SelectField( - 'Filter 1', - default=filter_cols[0], - choices=self.choicify(filter_cols))) - setattr(QueryForm, 'flt_op_' + str(i), SelectField( - 'Filter 1', - default='in', - choices=self.choicify(['in', 'not in']))) - setattr( - QueryForm, 'flt_eq_' + str(i), - TextField("Super", default='')) - for field in viz.flat_form_fields(): setattr(QueryForm, field, self.field_dict[field]) @@ -663,8 +649,11 @@ def add_to_form(attrs): for attr in attrs: setattr(QueryForm, attr, self.field_dict[attr]) + filter_choices = self.choicify(['in', 'not in']) # datasource type specific form elements - if viz.datasource.__class__.__name__ == 'SqlaTable': + datasource_classname = viz.datasource.__class__.__name__ + time_fields = None + if datasource_classname == 'SqlaTable': QueryForm.fieldsets += ({ 'label': 'SQL', 'fields': ['where', 'having'], @@ -675,8 +664,6 @@ def add_to_form(attrs): add_to_form(('where', 'having')) grains = viz.datasource.database.grains() - if not viz.datasource.any_dttm_col: - return QueryForm if grains: time_fields = ('granularity_sqla', 'time_grain_sqla') self.field_dict['time_grain_sqla'] = SelectField( @@ -695,19 +682,35 @@ def add_to_form(attrs): else: time_fields = 'granularity_sqla' add_to_form((time_fields, )) - else: + elif datasource_classname == 'DruidDatasource': time_fields = ('granularity', 'druid_time_origin') add_to_form(('granularity', 'druid_time_origin')) field_css_classes['granularity'] = ['form-control', 'select2_freeform'] field_css_classes['druid_time_origin'] = ['form-control', 'select2_freeform'] + filter_choices = self.choicify(['in', 'not in', 'regex']) add_to_form(('since', 'until')) - QueryForm.fieldsets = ({ - 'label': 'Time', - 'fields': ( - time_fields, - ('since', 'until'), - ), - 'description': "Time related form attributes", - },) + tuple(QueryForm.fieldsets) + filter_cols = viz.datasource.filterable_column_names or [''] + for i in range(10): + setattr(QueryForm, 'flt_col_' + str(i), SelectField( + 'Filter 1', + default=filter_cols[0], + choices=self.choicify(filter_cols))) + setattr(QueryForm, 'flt_op_' + str(i), SelectField( + 'Filter 1', + default='in', + choices=filter_choices)) + setattr( + QueryForm, 'flt_eq_' + str(i), + TextField("Super", default='')) + + if time_fields: + QueryForm.fieldsets = ({ + 'label': 'Time', + 'fields': ( + time_fields, + ('since', 'until'), + ), + 'description': "Time related form attributes", + },) + tuple(QueryForm.fieldsets) return QueryForm diff --git a/caravel/models.py b/caravel/models.py index 7f216ed45d25a..ffb076f6518b8 100644 --- a/caravel/models.py +++ b/caravel/models.py @@ -1171,6 +1171,8 @@ def recursive_get_fields(_conf): cond = Dimension(col) == eq if op == 'not in': cond = ~cond + elif op == 'regex': + cond = Filter(type="regex", pattern=eq, dimension=col) if filters: filters = Filter(type="and", fields=[ Filter.build_filter(cond), @@ -1201,7 +1203,8 @@ def recursive_get_fields(_conf): } client.groupby(**pre_qry) query_str += "// Two phase query\n// Phase 1\n" - query_str += json.dumps(client.query_dict, indent=2) + "\n" + query_str += json.dumps( + client.query_builder.last_query.query_dict, indent=2) + "\n" query_str += "//\nPhase 2 (built based on phase one's results)\n" df = client.export_pandas() if df is not None and not df.empty: @@ -1237,7 +1240,8 @@ def recursive_get_fields(_conf): }], } client.groupby(**qry) - query_str += json.dumps(client.query_dict, indent=2) + query_str += json.dumps( + client.query_builder.last_query.query_dict, indent=2) df = client.export_pandas() if df is None or df.size == 0: raise Exception(_("No data was returned.")) diff --git a/caravel/views.py b/caravel/views.py index 749282cbfd968..7c6da2155d5b3 100644 --- a/caravel/views.py +++ b/caravel/views.py @@ -24,7 +24,6 @@ from flask.ext.babelpkg import lazy_gettext as _ from flask_appbuilder.models.sqla.filters import BaseFilter -from pydruid.client import doublesum from sqlalchemy import create_engine, select, text from sqlalchemy.sql.expression import TextAsFrom from werkzeug.routing import BaseConverter @@ -1019,22 +1018,6 @@ def refresh_datasources(self): session.commit() return redirect("/druiddatasourcemodelview/list/") - @expose("/autocomplete///") - def autocomplete(self, datasource, column): - """used for filter autocomplete""" - client = utils.get_pydruid_client() - top = client.topn( - datasource=datasource, - granularity='all', - intervals='2013-10-04/2020-10-10', - aggregations={"count": doublesum("count")}, - dimension=column, - metric='count', - threshold=1000, - ) - values = sorted([d[column] for d in top[0]['result']]) - return json.dumps(values) - @app.errorhandler(500) def show_traceback(self): if config.get("SHOW_STACKTRACE"): diff --git a/setup.py b/setup.py index 9a96d4841a2f4..75061e6a5dc23 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ 'markdown>=2.6.2, <3.0.0', 'pandas==0.18.0', 'parsedatetime==2.0.0', - 'pydruid>=0.2.2, <0.3', + 'pydruid>=0.2.3, <0.4', 'python-dateutil>=2.4.2, <3.0.0', 'requests>=2.7.0, <3.0.0', 'sqlalchemy>=1.0.12, <2.0.0', diff --git a/tests/core_tests.py b/tests/core_tests.py index 3b66dc3209ba7..a6528af9a2fa8 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -319,6 +319,7 @@ def test_client(self, PyDruid): df = pd.DataFrame(nres) instance.export_pandas.return_value = df instance.query_dict = {} + instance.query_builder.last_query.query_dict = {} resp = self.client.get('/caravel/explore/druid/1/?viz_type=table&granularity=one+day&druid_time_origin=&since=7+days+ago&until=now&row_limit=5000&include_search=false&metrics=count&groupby=name&flt_col_0=dim1&flt_op_0=in&flt_eq_0=&slice_id=&slice_name=&collapsed_fieldsets=&action=&datasource_name=test_datasource&datasource_id=1&datasource_type=druid&previous_viz_type=table&json=true&force=true') print('-'*300) print(resp.data.decode('utf-8'))