Skip to content

Commit

Permalink
feat: view presto row objects in data grid (#7436)
Browse files Browse the repository at this point in the history
* feat: view presto row objects in data grid

* fix: address feedback

* fix: spacing
  • Loading branch information
khtruong authored and betodealmeida committed May 3, 2019
1 parent 59a81e1 commit 2a003c1
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ export default class FilterableTable extends PureComponent {

headerRenderer({ dataKey, label, sortBy, sortDirection }) {
return (
<div>
<div className="header-style">
{label}
{sortBy === dataKey &&
<SortIndicator sortDirection={sortDirection} />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,8 @@
}
.even-row { background: #f2f2f2; }
.odd-row { background: #ffffff; }
.header-style {
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
94 changes: 89 additions & 5 deletions superset/db_engine_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from sqlalchemy.engine.result import RowProxy
from sqlalchemy.engine.url import make_url
from sqlalchemy.sql import quoted_name, text
from sqlalchemy.sql.expression import ColumnClause
from sqlalchemy.sql.expression import TextAsFrom
from sqlalchemy.types import String, UnicodeText
import sqlparse
Expand Down Expand Up @@ -980,19 +981,98 @@ def get_columns(
result.append(cls._create_column_info(column, column.Column, column_type))
return result

@classmethod
def _is_column_name_quoted(cls, column_name: str) -> bool:
"""
Check if column name is in quotes
:param column_name: column name
:return: boolean
"""
return column_name.startswith('"') and column_name.endswith('"')

@classmethod
def _get_fields(cls, cols: List[dict]) -> List[ColumnClause]:
"""
Format column clauses where names are in quotes and labels are specified
:param cols: columns
:return: column clauses
"""
column_clauses = []
# Column names are separated by periods. This regex will find periods in a string
# if they are not enclosed in quotes because if a period is enclosed in quotes,
# then that period is part of a column name.
dot_pattern = r"""\. # split on period
(?= # look ahead
(?: # create non-capture group
[^\"]*\"[^\"]*\" # two quotes
)*[^\"]*$) # end regex"""
dot_regex = re.compile(dot_pattern, re.VERBOSE)
for col in cols:
# get individual column names
col_names = re.split(dot_regex, col['name'])
# quote each column name if it is not already quoted
for index, col_name in enumerate(col_names):
if not cls._is_column_name_quoted(col_name):
col_names[index] = '"{}"'.format(col_name)
quoted_col_name = '.'.join(
col_name if cls._is_column_name_quoted(col_name) else f'"{col_name}"'
for col_name in col_names)
# create column clause in the format "name"."name" AS "name.name"
column_clause = sqla.literal_column(quoted_col_name).label(col['name'])
column_clauses.append(column_clause)
return column_clauses

@classmethod
def _filter_presto_cols(cls, cols: List[dict]) -> List[dict]:
"""
We want to filter out columns that correspond to array content because expanding
arrays would require us to use unnest and join. This can lead to a large,
complicated, and slow query.
Example: select array_content
from TABLE
cross join UNNEST(array_column) as t(array_content);
We know which columns to skip because cols is a list provided to us in a specific
order where a structural column is positioned right before its content.
Example: Column Name: ColA, Column Data Type: array(row(nest_obj int))
cols = [ ..., ColA, ColA.nest_obj, ... ]
When we run across an array, check if subsequent column names start with the
array name and skip them.
:param cols: columns
:return: filtered list of columns
"""
filtered_cols = []
curr_array_col_name = ''
for col in cols:
# col corresponds to an array's content and should be skipped
if curr_array_col_name and col['name'].startswith(curr_array_col_name):
continue
# col is an array so we need to check if subsequent
# columns correspond to the array's contents
elif str(col['type']) == 'ARRAY':
curr_array_col_name = col['name']
filtered_cols.append(col)
else:
curr_array_col_name = ''
filtered_cols.append(col)
return filtered_cols

@classmethod
def select_star(cls, my_db, table_name: str, engine: Engine, schema: str = None,
limit: int = 100, show_cols: bool = False, indent: bool = True,
latest_partition: bool = True, cols: List[dict] = []) -> str:
"""
Temporary method until we have a function that can handle row and array columns
Include selecting properties of row objects. We cannot easily break arrays into
rows, so render the whole array in its own row and skip columns that correspond
to an array's contents.
"""
presto_cols = cols
if show_cols:
dot_regex = r'\.(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)'
presto_cols = [
col for col in presto_cols if re.search(dot_regex, col['name']) is None]
return BaseEngineSpec.select_star(
presto_cols = cls._filter_presto_cols(cols)
return super(PrestoEngineSpec, cls).select_star(
my_db, table_name, engine, schema, limit,
show_cols, indent, latest_partition, presto_cols,
)
Expand Down Expand Up @@ -1526,6 +1606,10 @@ def where_latest_partition(
return qry.where(Column(col_name) == value)
return False

@classmethod
def _get_fields(cls, cols: List[dict]) -> List[ColumnClause]:
return BaseEngineSpec._get_fields(cols)

@classmethod
def latest_sub_partition(cls, table_name, schema, database, **kwargs):
# TODO(bogdan): implement`
Expand Down
23 changes: 23 additions & 0 deletions tests/db_engine_specs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,29 @@ def test_presto_get_array_within_row_within_array_column(self):
('column_name.nested_obj', 'FLOAT')]
self.verify_presto_column(presto_column, expected_results)

def test_presto_get_fields(self):
cols = [
{'name': 'column'},
{'name': 'column.nested_obj'},
{'name': 'column."quoted.nested obj"'}]
actual_results = PrestoEngineSpec._get_fields(cols)
expected_results = [
{'name': '"column"', 'label': 'column'},
{'name': '"column"."nested_obj"', 'label': 'column.nested_obj'},
{'name': '"column"."quoted.nested obj"',
'label': 'column."quoted.nested obj"'}]
for actual_result, expected_result in zip(actual_results, expected_results):
self.assertEqual(actual_result.element.name, expected_result['name'])
self.assertEqual(actual_result.name, expected_result['label'])

def test_presto_filter_presto_cols(self):
cols = [
{'name': 'column', 'type': 'ARRAY'},
{'name': 'column.nested_obj', 'type': 'FLOAT'}]
actual_results = PrestoEngineSpec._filter_presto_cols(cols)
expected_results = [cols[0]]
self.assertEqual(actual_results, expected_results)

def test_hive_get_view_names_return_empty_list(self):
self.assertEquals([], HiveEngineSpec.get_view_names(mock.ANY, mock.ANY))

Expand Down

0 comments on commit 2a003c1

Please sign in to comment.