Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lexer: quote identifier when get sql digest #1151

Merged
merged 5 commits into from
Feb 24, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion digester.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"sync"
"unicode"
"unsafe"

"github.com/pingcap/parser/charset"
)

// DigestHash generates the digest of statements.
Expand Down Expand Up @@ -159,14 +161,36 @@ func (d *sqlDigester) normalize(sql string) {

d.reduceLit(&currTok)

if currTok.tok == identifier {
if strings.HasPrefix(currTok.lit, "_") {
_, _, err := charset.GetCharsetInfo(currTok.lit[1:])
if err == nil {
currTok.tok = underscoreCS
goto APPEND
}
}

if tok1 := d.lexer.isTokenIdentifier(currTok.lit, pos.Offset); tok1 != 0 {
currTok.tok = tok1
}
}
APPEND:
d.tokens = append(d.tokens, currTok)
}
d.lexer.reset("")
for i, token := range d.tokens {
if token.tok == singleAtIdentifier {
d.buffer.WriteString("@")
d.buffer.WriteString(token.lit)
} else if token.tok == underscoreCS {
d.buffer.WriteString("(_charset)")
} else if token.tok == identifier {
d.buffer.WriteByte('`')
d.buffer.WriteString(token.lit)
d.buffer.WriteByte('`')
} else {
d.buffer.WriteString(token.lit)
}
d.buffer.WriteString(token.lit)
if i != len(d.tokens)-1 {
d.buffer.WriteRune(' ')
}
Expand Down
41 changes: 21 additions & 20 deletions digester_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,33 +28,34 @@ func (s *testSQLDigestSuite) TestNormalize(c *C) {
input string
expect string
}{
{"select _utf8mb4'123'", "select (_charset) ?"},
{"SELECT 1", "select ?"},
{"select * from b where id = 1", "select * from b where id = ?"},
{"select 1 from b where id in (1, 3, '3', 1, 2, 3, 4)", "select ? from b where id in ( ... )"},
{"select 1 from b where id in (1, a, 4)", "select ? from b where id in ( ? , a , ? )"},
{"select 1 from b order by 2", "select ? from b order by 2"},
{"select * from b where id = 1", "select * from `b` where `id` = ?"},
{"select 1 from b where id in (1, 3, '3', 1, 2, 3, 4)", "select ? from `b` where `id` in ( ... )"},
{"select 1 from b where id in (1, a, 4)", "select ? from `b` where `id` in ( ? , `a` , ? )"},
{"select 1 from b order by 2", "select ? from `b` order by 2"},
{"select /*+ a hint */ 1", "select ?"},
{"select /* a hint */ 1", "select ?"},
{"select truncate(1, 2)", "select truncate ( ... )"},
{"select -1 + - 2 + b - c + 0.2 + (-2) from c where d in (1, -2, +3)", "select ? + ? + b - c + ? + ( ? ) from c where d in ( ... )"},
{"select * from t where a <= -1 and b < -2 and c = -3 and c > -4 and c >= -5 and e is 1", "select * from t where a <= ? and b < ? and c = ? and c > ? and c >= ? and e is ?"},
{"select count(a), b from t group by 2", "select count ( a ) , b from t group by 2"},
{"select count(a), b, c from t group by 2, 3", "select count ( a ) , b , c from t group by 2 , 3"},
{"select count(a), b, c from t group by (2, 3)", "select count ( a ) , b , c from t group by ( 2 , 3 )"},
{"select a, b from t order by 1, 2", "select a , b from t order by 1 , 2"},
{"select count(*) from t", "select count ( ? ) from t"},
{"select * from t Force Index(kk)", "select * from t"},
{"select * from t USE Index(kk)", "select * from t"},
{"select * from t Ignore Index(kk)", "select * from t"},
{"select * from t1 straight_join t2 on t1.id=t2.id", "select * from t1 join t2 on t1 . id = t2 . id"},
{"select -1 + - 2 + b - c + 0.2 + (-2) from c where d in (1, -2, +3)", "select ? + ? + `b` - `c` + ? + ( ? ) from `c` where `d` in ( ... )"},
{"select * from t where a <= -1 and b < -2 and c = -3 and c > -4 and c >= -5 and e is 1", "select * from `t` where `a` <= ? and `b` < ? and `c` = ? and `c` > ? and `c` >= ? and `e` is ?"},
{"select count(a), b from t group by 2", "select count ( `a` ) , `b` from `t` group by 2"},
{"select count(a), b, c from t group by 2, 3", "select count ( `a` ) , `b` , `c` from `t` group by 2 , 3"},
{"select count(a), b, c from t group by (2, 3)", "select count ( `a` ) , `b` , `c` from `t` group by ( 2 , 3 )"},
{"select a, b from t order by 1, 2", "select `a` , `b` from `t` order by 1 , 2"},
{"select count(*) from t", "select count ( ? ) from `t`"},
{"select * from t Force Index(kk)", "select * from `t`"},
{"select * from t USE Index(kk)", "select * from `t`"},
{"select * from t Ignore Index(kk)", "select * from `t`"},
{"select * from t1 straight_join t2 on t1.id=t2.id", "select * from `t1` join `t2` on `t1` . `id` = `t2` . `id`"},
// test syntax error, it will be checked by parser, but it should not make normalize dead loop.
{"select * from t ignore index(", "select * from t ignore index"},
{"select * from t ignore index(", "select * from `t` ignore index"},
{"select /*+ ", "select "},
{"select * from 🥳", "select * from"},
{"select 1 / 2", "select ? / ?"},
{"select * from t where a = 40 limit ?, ?", "select * from t where a = ? limit ..."},
{"select * from t where a > ?", "select * from t where a > ?"},
{"select @a=b from t", "select @a = b from t"},
{"select * from t where a = 40 limit ?, ?", "select * from `t` where `a` = ? limit ..."},
{"select * from t where a > ?", "select * from `t` where `a` > ?"},
{"select @a=b from t", "select @a = `b` from `t`"},
}
for _, test := range tests {
normalized := parser.Normalize(test.input)
Expand All @@ -73,7 +74,7 @@ func (s *testSQLDigestSuite) TestNormalizeDigest(c *C) {
normalized string
digest string
}{
{"select 1 from b where id in (1, 3, '3', 1, 2, 3, 4)", "select ? from b where id in ( ... )", "f36161eef94dbfbd5e2f6b9a2f498a4c7facc6860621fbeb8084f63898275016"},
{"select 1 from b where id in (1, 3, '3', 1, 2, 3, 4)", "select ? from `b` where `id` in ( ... )", "e1c8cc2738f596dc24f15ef8eb55e0d902910d7298983496362a7b46dbc0b310"},
}
for _, test := range tests {
normalized, digest := parser.NormalizeDigest(test.sql)
Expand Down