Skip to content

Commit

Permalink
Implementing druid's regex filters (#501)
Browse files Browse the repository at this point in the history
* Implementing druid's regex filters

* Debugging

* Debuggin'
  • Loading branch information
mistercrunch committed May 23, 2016
1 parent 29f5ace commit 57990bf
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 46 deletions.
55 changes: 29 additions & 26 deletions caravel/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,29 +642,18 @@ class QueryForm(OmgWtForm):
collapsed_fieldsets = HiddenField()
viz_type = self.field_dict.get('viz_type')

filter_cols = viz.datasource.filterable_column_names or ['']
for i in range(10):
setattr(QueryForm, 'flt_col_' + str(i), SelectField(
'Filter 1',
default=filter_cols[0],
choices=self.choicify(filter_cols)))
setattr(QueryForm, 'flt_op_' + str(i), SelectField(
'Filter 1',
default='in',
choices=self.choicify(['in', 'not in'])))
setattr(
QueryForm, 'flt_eq_' + str(i),
TextField("Super", default=''))

for field in viz.flat_form_fields():
setattr(QueryForm, field, self.field_dict[field])

def add_to_form(attrs):
for attr in attrs:
setattr(QueryForm, attr, self.field_dict[attr])

filter_choices = self.choicify(['in', 'not in'])
# datasource type specific form elements
if viz.datasource.__class__.__name__ == 'SqlaTable':
datasource_classname = viz.datasource.__class__.__name__
time_fields = None
if datasource_classname == 'SqlaTable':
QueryForm.fieldsets += ({
'label': 'SQL',
'fields': ['where', 'having'],
Expand All @@ -675,8 +664,6 @@ def add_to_form(attrs):
add_to_form(('where', 'having'))
grains = viz.datasource.database.grains()

if not viz.datasource.any_dttm_col:
return QueryForm
if grains:
time_fields = ('granularity_sqla', 'time_grain_sqla')
self.field_dict['time_grain_sqla'] = SelectField(
Expand All @@ -695,19 +682,35 @@ def add_to_form(attrs):
else:
time_fields = 'granularity_sqla'
add_to_form((time_fields, ))
else:
elif datasource_classname == 'DruidDatasource':
time_fields = ('granularity', 'druid_time_origin')
add_to_form(('granularity', 'druid_time_origin'))
field_css_classes['granularity'] = ['form-control', 'select2_freeform']
field_css_classes['druid_time_origin'] = ['form-control', 'select2_freeform']
filter_choices = self.choicify(['in', 'not in', 'regex'])
add_to_form(('since', 'until'))

QueryForm.fieldsets = ({
'label': 'Time',
'fields': (
time_fields,
('since', 'until'),
),
'description': "Time related form attributes",
},) + tuple(QueryForm.fieldsets)
filter_cols = viz.datasource.filterable_column_names or ['']
for i in range(10):
setattr(QueryForm, 'flt_col_' + str(i), SelectField(
'Filter 1',
default=filter_cols[0],
choices=self.choicify(filter_cols)))
setattr(QueryForm, 'flt_op_' + str(i), SelectField(
'Filter 1',
default='in',
choices=filter_choices))
setattr(
QueryForm, 'flt_eq_' + str(i),
TextField("Super", default=''))

if time_fields:
QueryForm.fieldsets = ({
'label': 'Time',
'fields': (
time_fields,
('since', 'until'),
),
'description': "Time related form attributes",
},) + tuple(QueryForm.fieldsets)
return QueryForm
8 changes: 6 additions & 2 deletions caravel/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,8 @@ def recursive_get_fields(_conf):
cond = Dimension(col) == eq
if op == 'not in':
cond = ~cond
elif op == 'regex':
cond = Filter(type="regex", pattern=eq, dimension=col)
if filters:
filters = Filter(type="and", fields=[
Filter.build_filter(cond),
Expand Down Expand Up @@ -1201,7 +1203,8 @@ def recursive_get_fields(_conf):
}
client.groupby(**pre_qry)
query_str += "// Two phase query\n// Phase 1\n"
query_str += json.dumps(client.query_dict, indent=2) + "\n"
query_str += json.dumps(
client.query_builder.last_query.query_dict, indent=2) + "\n"
query_str += "//\nPhase 2 (built based on phase one's results)\n"
df = client.export_pandas()
if df is not None and not df.empty:
Expand Down Expand Up @@ -1237,7 +1240,8 @@ def recursive_get_fields(_conf):
}],
}
client.groupby(**qry)
query_str += json.dumps(client.query_dict, indent=2)
query_str += json.dumps(
client.query_builder.last_query.query_dict, indent=2)
df = client.export_pandas()
if df is None or df.size == 0:
raise Exception(_("No data was returned."))
Expand Down
17 changes: 0 additions & 17 deletions caravel/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from flask.ext.babelpkg import lazy_gettext as _
from flask_appbuilder.models.sqla.filters import BaseFilter

from pydruid.client import doublesum
from sqlalchemy import create_engine, select, text
from sqlalchemy.sql.expression import TextAsFrom
from werkzeug.routing import BaseConverter
Expand Down Expand Up @@ -1019,22 +1018,6 @@ def refresh_datasources(self):
session.commit()
return redirect("/druiddatasourcemodelview/list/")

@expose("/autocomplete/<datasource>/<column>/")
def autocomplete(self, datasource, column):
"""used for filter autocomplete"""
client = utils.get_pydruid_client()
top = client.topn(
datasource=datasource,
granularity='all',
intervals='2013-10-04/2020-10-10',
aggregations={"count": doublesum("count")},
dimension=column,
metric='count',
threshold=1000,
)
values = sorted([d[column] for d in top[0]['result']])
return json.dumps(values)

@app.errorhandler(500)
def show_traceback(self):
if config.get("SHOW_STACKTRACE"):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
'markdown>=2.6.2, <3.0.0',
'pandas==0.18.0',
'parsedatetime==2.0.0',
'pydruid>=0.2.2, <0.3',
'pydruid>=0.2.3, <0.4',
'python-dateutil>=2.4.2, <3.0.0',
'requests>=2.7.0, <3.0.0',
'sqlalchemy>=1.0.12, <2.0.0',
Expand Down
1 change: 1 addition & 0 deletions tests/core_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ def test_client(self, PyDruid):
df = pd.DataFrame(nres)
instance.export_pandas.return_value = df
instance.query_dict = {}
instance.query_builder.last_query.query_dict = {}
resp = self.client.get('/caravel/explore/druid/1/?viz_type=table&granularity=one+day&druid_time_origin=&since=7+days+ago&until=now&row_limit=5000&include_search=false&metrics=count&groupby=name&flt_col_0=dim1&flt_op_0=in&flt_eq_0=&slice_id=&slice_name=&collapsed_fieldsets=&action=&datasource_name=test_datasource&datasource_id=1&datasource_type=druid&previous_viz_type=table&json=true&force=true')
print('-'*300)
print(resp.data.decode('utf-8'))
Expand Down

0 comments on commit 57990bf

Please sign in to comment.