Skip to content

Commit 1c9086b

Browse files
committed
SQL: allow identifier with placeholder such as '${a_1}'
Close #3169. Some? dialects support shell-like variable substitution. HiveQL is one of such dialects. https://cwiki.apache.org/confluence/display/Hive/LanguageManual+VariableSubstitution With this change, the SQL parser accepts '${var}' as a part of an identifier. TODO: `var` itself can be extracted as a reference tag. Signed-off-by: Masatake YAMATO <yamato@redhat.com>
1 parent c31d572 commit 1c9086b

File tree

4 files changed

+118
-8
lines changed

4 files changed

+118
-8
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--sort=no
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
tb_name${dt} input.sql /^create table database.tb_name${dt} as$/;" t
2+
col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}
3+
col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}
4+
tb_name${dt}${dt0} input.sql /^create table database.tb_name${dt}${dt0} as$/;" t
5+
col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}${dt0}
6+
col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}${dt0}
7+
${dt1}tb_name${dt}${dt0} input.sql /^create table database.${dt1}tb_name${dt}${dt0} as$/;" t
8+
col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}
9+
col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}
10+
${dt1}tb_name${dt}${dt0}Z input.sql /^create table database.${dt1}tb_name${dt}${dt0}Z as$/;" t
11+
col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}Z
12+
col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}Z
13+
tb_${dt2}_name input.sql /^create table database.tb_${dt2}_name as$/;" t
14+
col_${key0} input.sql /^select col_${key0}, col_${key1} from database.tb_name;$/;" E table:tb_${dt2}_name
15+
col_${key1} input.sql /^select col_${key0}, col_${key1} from database.tb_name;$/;" E table:tb_${dt2}_name
16+
tb_${${d}${t:h}${i}}_name input.sql /^create table database.tb_${${d}${t:h}${i}}_name as$/;" t
17+
col_${key${n}${m}a} input.sql /^select col_${key${n}${m}a}, col_${key${m}${n}b} from database.tb_name;$/;" E table:tb_${${d}${t:h}${i}}_name
18+
col_${key${m}${n}b} input.sql /^select col_${key${n}${m}a}, col_${key${m}${n}b} from database.tb_name;$/;" E table:tb_${${d}${t:h}${i}}_name
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
-- Based on issue #3169 opened by @Appalled
2+
3+
create table database.tb_name${dt} as
4+
select col_a, col_b from database.tb_name;
5+
6+
create table database.tb_name${dt}${dt0} as
7+
select col_a, col_b from database.tb_name;
8+
9+
create table database.${dt1}tb_name${dt}${dt0} as
10+
select col_a, col_b from database.tb_name;
11+
12+
create table database.${dt1}tb_name${dt}${dt0}Z as
13+
select col_a, col_b from database.tb_name;
14+
15+
create table database.tb_${dt2}_name as
16+
select col_${key0}, col_${key1} from database.tb_name;
17+
18+
create table database.tb_${${d}${t:h}${i}}_name as
19+
select col_${key${n}${m}a}, col_${key${m}${n}b} from database.tb_name;

parsers/sql.c

+80-8
Original file line numberDiff line numberDiff line change
@@ -684,16 +684,68 @@ static void parseString (vString *const string, const int delimiter, int *promis
684684
}
685685
}
686686

687+
/* Parsing ${foo}.
688+
*
689+
* HiveQL is one of implementation having the variable substitution feature.
690+
* https://cwiki.apache.org/confluence/display/Hive/LanguageManual+VariableSubstitution
691+
*/
692+
static int parseVarSubstSequence (vString *const string, const int firstChar);
693+
static int parseVarSubst (vString *const string, const int firstChar)
694+
{
695+
int c = firstChar;
696+
Assert (c == '$');
697+
vStringPut (string, c);
698+
699+
c = getcFromInputFile ();
700+
if (c != '{')
701+
return c;
702+
vStringPut (string, c);
703+
704+
while ((c = getcFromInputFile ())!= EOF)
705+
{
706+
if (c == '}')
707+
{
708+
vStringPut (string, c);
709+
c = getcFromInputFile ();
710+
return c;
711+
}
712+
else if (c == '$')
713+
{
714+
c = parseVarSubstSequence (string, c);
715+
ungetcToInputFile (c);
716+
}
717+
else
718+
vStringPut (string, c);
719+
}
720+
721+
return c;
722+
}
723+
724+
static int parseVarSubstSequence (vString *const string, const int firstChar)
725+
{
726+
int c;
727+
728+
do
729+
c = parseVarSubst (string, c);
730+
while (c == '$');
731+
732+
return c;
733+
}
734+
687735
/* Read a C identifier beginning with "firstChar" and places it into "name".
688736
*/
689737
static void parseIdentifier (vString *const string, const int firstChar)
690738
{
691739
int c = firstChar;
692-
Assert (isIdentChar1 (c));
740+
Assert (vStringLength (string) > 0 || isIdentChar1 (c));
693741
do
694742
{
695743
vStringPut (string, c);
696744
c = getcFromInputFile ();
745+
746+
/* Handle ${var} in HiveQL. */
747+
if (c == '$')
748+
c = parseVarSubstSequence (string, c);
697749
} while (isIdentChar (c));
698750
if (!isspace (c))
699751
ungetcToInputFile (c); /* unget non-identifier character */
@@ -937,15 +989,23 @@ static void readToken (tokenInfo *const token)
937989
}
938990

939991
case '$':
940-
token->type = parseDollarQuote (token->string, c, &token->promise);
941-
token->lineNumber = getInputLineNumber ();
942-
token->filePosition = getInputFilePosition ();
943-
break;
992+
{
993+
int c0 = getcFromInputFile ();
994+
ungetcToInputFile (c0);
995+
if (c0 != '{')
996+
{
997+
token->type = parseDollarQuote (token->string, c, &token->promise);
998+
token->lineNumber = getInputLineNumber ();
999+
token->filePosition = getInputFilePosition ();
1000+
break;
1001+
}
1002+
c = parseVarSubstSequence (token->string, c);
1003+
/* FALL THROUGH */
1004+
}
9441005

9451006
default:
946-
if (! isIdentChar1 (c))
947-
token->type = TOKEN_UNDEFINED;
948-
else
1007+
if ( isIdentChar1 (c)
1008+
|| (vStringLength (token->string) > 0 && isIdentChar (c)))
9491009
{
9501010
parseIdentifier (token->string, c);
9511011
token->lineNumber = getInputLineNumber ();
@@ -962,6 +1022,18 @@ static void readToken (tokenInfo *const token)
9621022
else
9631023
token->type = TOKEN_KEYWORD;
9641024
}
1025+
else if (vStringLength (token->string) > 0)
1026+
{
1027+
ungetcToInputFile (c);
1028+
1029+
/* token->string may be ${var}.
1030+
* We regard ${var} as an identifier. */
1031+
token->type = TOKEN_IDENTIFIER;
1032+
token->lineNumber = getInputLineNumber ();
1033+
token->filePosition = getInputFilePosition ();
1034+
}
1035+
else
1036+
token->type = TOKEN_UNDEFINED;
9651037
break;
9661038
}
9671039
}

0 commit comments

Comments
 (0)