Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Additional bigquery specific rewrites #77

Merged
merged 3 commits into from
Jul 5, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions inst/csv/replacementPatterns.csv
Original file line number Diff line number Diff line change
Expand Up @@ -487,3 +487,4 @@ bigquery,"union select","union distinct select"
bigquery,SELECT DISTINCT @a FROM @b INTERSECT SELECT DISTINCT @a FROM @c;,SELECT t1.@a FROM (SELECT DISTINCT @a FROM @b UNION ALL SELECT DISTINCT @a FROM @c) AS t1 GROUP BY @a HAVING COUNT(*) >= 2;
bigquery,(SELECT DISTINCT @a FROM @b INTERSECT SELECT DISTINCT @a FROM @c),(SELECT t1.@a FROM (SELECT DISTINCT @a FROM @b UNION ALL SELECT DISTINCT @a FROM @c) AS t1 GROUP BY @a HAVING COUNT(*) >= 2)
bigquery,"ISNULL(@a,@b)","IFNULL(@a,@b)"
bigquery,as \" @a \",as @a
Binary file modified inst/java/SqlRender.jar
Binary file not shown.
75 changes: 64 additions & 11 deletions java/org/ohdsi/sql/BigQueryTranslate.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ private static class CommaListIterator {
private String listSuffix;

public enum ListType {
SELECT, GROUP_BY, ORDER_BY, WITH_COLUMNS
SELECT, GROUP_BY, ORDER_BY, WITH_COLUMNS, IN
}

public CommaListIterator(String expression_list, ListType list_type) {
Expand Down Expand Up @@ -108,6 +108,9 @@ private void splitList() {
case WITH_COLUMNS:
// empty
break;
case IN:
// empty
break;
}
}

Expand Down Expand Up @@ -157,6 +160,9 @@ private void splitExpression() {
case WITH_COLUMNS:
// empty
break;
case IN:
// empty
break;
}
}

Expand Down Expand Up @@ -257,11 +263,8 @@ private static String bigQueryAliasCommonTableExpressions(String sql) {
* @return the query with GROUP BY elements replaced
*/
private static String bigQueryConvertSelectListReferences(String sql, String select_pattern, CommaListIterator.ListType list_type) {
boolean added_semicolon = false;
if (select_pattern.substring(select_pattern.length() - 1).equals(";")) {
sql = sql + ";";
added_semicolon = true;
}
// Adds semi-colon in case there isn't one already
sql = sql + ";";

// Iterates SELECT statements
List<Block> select_statement_pattern = SqlTranslate.parseSearchPattern(select_pattern);
Expand Down Expand Up @@ -321,9 +324,9 @@ private static String bigQueryConvertSelectListReferences(String sql, String sel
}
sql += suffix;
}
if (added_semicolon) {
sql = sql.substring(0, sql.length() - 1);
}

// Removes semi-colon added at the beginning
sql = sql.substring(0, sql.length() - 1);
return sql;
}

Expand Down Expand Up @@ -415,8 +418,7 @@ private static String bigQueryReplaceStringConcatsInStatement(String sql) {
/**
* Lower cases everything but string literals
*
* @param sql
* - the query to translate
* @param sql - the query to translate
* @return the query after translation
*/
private static String bigQueryLowerCase(String sql) {
Expand All @@ -429,6 +431,56 @@ private static String bigQueryLowerCase(String sql) {
return sql;
}

/**
* Removes quotes from IN lists elements where the lhs ends with "_id" and the elements are all digits
*
* @param sql - the query to translate
* @return the query after translation
*/
private static String bigQueryUnquoteIdInLists(String sql) {
List<Block> in_list_pattern = SqlTranslate.parseSearchPattern("in (@@i)");
List<StringUtils.Token> tokens = StringUtils.tokenizeSql(sql);

// Iterates SELECT statements
for (MatchedPattern in_list_match = SqlTranslate.search(sql, in_list_pattern,
0); in_list_match.start != -1; in_list_match = SqlTranslate.search(sql, in_list_pattern,
in_list_match.startToken + 1)) {
final String in_list = in_list_match.variableToValue.get("@@i");

// Checks if the lhs is an identifier ending with "_id"
if (in_list_match.startToken <=0) {
continue;
}
final StringUtils.Token lhs_token = tokens.get(in_list_match.startToken - 1);
if (!lhs_token.isIdentifier()) {
continue;
}
if (!lhs_token.text.toLowerCase().endsWith("_id")) {
continue;
}

// Iterates elements of the IN list
CommaListIterator in_list_iter = new CommaListIterator(in_list, CommaListIterator.ListType.IN);
for (; !in_list_iter.IsDone(); in_list_iter.Next()) {
final String expr = in_list_iter.GetFullExpression();
if (expr.charAt(0) != '\'' || expr.charAt(expr.length() - 1) != '\'') {
return sql;
}
for (int i = 1; i < expr.length() - 1; ++i) {
if (!Character.isDigit(expr.charAt(i))) {
return sql;
}
}
}
final String replacement_in_list = in_list.replaceAll("\'", "");
sql = sql.substring(0, in_list_match.start)
+ " in (" + replacement_in_list + ")"
+ sql.substring(in_list_match.end, sql.length());
}

return sql;
}

/**
* bigQuery specific translations
*
Expand All @@ -443,6 +495,7 @@ public static String translatebigQuery(String sql) {
sql = bigQueryConvertSelectListReferences(sql, "select @@s from @@b group by @@r)", CommaListIterator.ListType.GROUP_BY);
sql = bigQueryConvertSelectListReferences(sql, "select @@s from @@b group by @@c order by @@r;", CommaListIterator.ListType.ORDER_BY);
sql = bigQueryReplaceStringConcatsInStatement(sql);
sql = bigQueryUnquoteIdInLists(sql);
return sql;
}
}
24 changes: 24 additions & 0 deletions tests/testthat/test-translateSql.R
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,30 @@ test_that("translateSQL sql server -> bigquery isnull", {
expect_equal_ignore_spaces(sql, "select IFNULL(x,y) from t;")
})

test_that("translateSQL sql server -> bigquery unquote aliases", {
sql <- translateSql("SELECT a as \"b\" from t;",
targetDialect = "bigquery")$sql
expect_equal_ignore_spaces(sql, "select a as b from t;")
})

test_that("translateSQL sql server -> bigquery non-id in list", {
sql <- translateSql("select * from t where x in ('333','22','1')",
targetDialect = "bigquery")$sql
expect_equal_ignore_spaces(sql, "select * from t where x in ('333','22','1')")
})

test_that("translateSQL sql server -> bigquery non-integer in lists", {
sql <- translateSql("select * from t where x_id in ('333','22','1a')",
targetDialect = "bigquery")$sql
expect_equal_ignore_spaces(sql, "select * from t where x_id in ('333','22','1a')")
})

test_that("translateSQL sql server -> bigquery unquote id in lists", {
sql <- translateSql("select * from t where x_id in ('333','22','1')",
targetDialect = "bigquery")$sql
expect_equal_ignore_spaces(sql, "select * from t where x_id in (333,22,1)")
})

# For debugging: force reload of patterns:
# rJava::J("org.ohdsi.sql.SqlTranslate")$setReplacementPatterns("inst/csv/replacementPatterns.csv")

Expand Down