pingcap · xiongjiwei · Feb 24, 2021 · Jan 12, 2021 · Jan 12, 2021 · Jan 22, 2021
diff --git a/digester.go b/digester.go
@@ -23,6 +23,8 @@ import (
 	"sync"
 	"unicode"
 	"unsafe"
+
+	"github.com/pingcap/parser/charset"
 )
 
 // DigestHash generates the digest of statements.
@@ -159,14 +161,36 @@ func (d *sqlDigester) normalize(sql string) {
 
 		d.reduceLit(&currTok)
 
+		if currTok.tok == identifier {
+			if strings.HasPrefix(currTok.lit, "_") {
+				_, _, err := charset.GetCharsetInfo(currTok.lit[1:])
+				if err == nil {
+					currTok.tok = underscoreCS
+					goto APPEND
+				}
+			}
+
+			if tok1 := d.lexer.isTokenIdentifier(currTok.lit, pos.Offset); tok1 != 0 {
+				currTok.tok = tok1
+			}
+		}
+	APPEND:
 		d.tokens = append(d.tokens, currTok)
 	}
 	d.lexer.reset("")
 	for i, token := range d.tokens {
 		if token.tok == singleAtIdentifier {
 			d.buffer.WriteString("@")
+			d.buffer.WriteString(token.lit)
+		} else if token.tok == underscoreCS {
+			d.buffer.WriteString("(_charset)")
+		} else if token.tok == identifier {
+			d.buffer.WriteByte('`')
+			d.buffer.WriteString(token.lit)
+			d.buffer.WriteByte('`')
+		} else {
+			d.buffer.WriteString(token.lit)
 		}
-		d.buffer.WriteString(token.lit)
 		if i != len(d.tokens)-1 {
 			d.buffer.WriteRune(' ')
 		}

diff --git a/digester_test.go b/digester_test.go
@@ -28,33 +28,34 @@ func (s *testSQLDigestSuite) TestNormalize(c *C) {
 		input  string
 		expect string
 	}{
+		{"select _utf8mb4'123'", "select (_charset) ?"},
 		{"SELECT 1", "select ?"},
-		{"select * from b where id = 1", "select * from b where id = ?"},
-		{"select 1 from b where id in (1, 3, '3', 1, 2, 3, 4)", "select ? from b where id in ( ... )"},
-		{"select 1 from b where id in (1, a, 4)", "select ? from b where id in ( ? , a , ? )"},
-		{"select 1 from b order by 2", "select ? from b order by 2"},
+		{"select * from b where id = 1", "select * from `b` where `id` = ?"},
+		{"select 1 from b where id in (1, 3, '3', 1, 2, 3, 4)", "select ? from `b` where `id` in ( ... )"},
+		{"select 1 from b where id in (1, a, 4)", "select ? from `b` where `id` in ( ? , `a` , ? )"},
+		{"select 1 from b order by 2", "select ? from `b` order by 2"},
 		{"select /*+ a hint */ 1", "select ?"},
 		{"select /* a hint */ 1", "select ?"},
 		{"select truncate(1, 2)", "select truncate ( ... )"},
-		{"select -1 + - 2 + b - c + 0.2 + (-2) from c where d in (1, -2, +3)", "select ? + ? + b - c + ? + ( ? ) from c where d in ( ... )"},
-		{"select * from t where a <= -1 and b < -2 and c = -3 and c > -4 and c >= -5 and e is 1", "select * from t where a <= ? and b < ? and c = ? and c > ? and c >= ? and e is ?"},
-		{"select count(a), b from t group by 2", "select count ( a ) , b from t group by 2"},
-		{"select count(a), b, c from t group by 2, 3", "select count ( a ) , b , c from t group by 2 , 3"},
-		{"select count(a), b, c from t group by (2, 3)", "select count ( a ) , b , c from t group by ( 2 , 3 )"},
-		{"select a, b from t order by 1, 2", "select a , b from t order by 1 , 2"},
-		{"select count(*) from t", "select count ( ? ) from t"},
-		{"select * from t Force Index(kk)", "select * from t"},
-		{"select * from t USE Index(kk)", "select * from t"},
-		{"select * from t Ignore Index(kk)", "select * from t"},
-		{"select * from t1 straight_join t2 on t1.id=t2.id", "select * from t1 join t2 on t1 . id = t2 . id"},
+		{"select -1 + - 2 + b - c + 0.2 + (-2) from c where d in (1, -2, +3)", "select ? + ? + `b` - `c` + ? + ( ? ) from `c` where `d` in ( ... )"},
+		{"select * from t where a <= -1 and b < -2 and c = -3 and c > -4 and c >= -5 and e is 1", "select * from `t` where `a` <= ? and `b` < ? and `c` = ? and `c` > ? and `c` >= ? and `e` is ?"},
+		{"select count(a), b from t group by 2", "select count ( `a` ) , `b` from `t` group by 2"},
+		{"select count(a), b, c from t group by 2, 3", "select count ( `a` ) , `b` , `c` from `t` group by 2 , 3"},
+		{"select count(a), b, c from t group by (2, 3)", "select count ( `a` ) , `b` , `c` from `t` group by ( 2 , 3 )"},
+		{"select a, b from t order by 1, 2", "select `a` , `b` from `t` order by 1 , 2"},
+		{"select count(*) from t", "select count ( ? ) from `t`"},
+		{"select * from t Force Index(kk)", "select * from `t`"},
+		{"select * from t USE Index(kk)", "select * from `t`"},
+		{"select * from t Ignore Index(kk)", "select * from `t`"},
+		{"select * from t1 straight_join t2 on t1.id=t2.id", "select * from `t1` join `t2` on `t1` . `id` = `t2` . `id`"},
 		// test syntax error, it will be checked by parser, but it should not make normalize dead loop.
-		{"select * from t ignore index(", "select * from t ignore index"},
+		{"select * from t ignore index(", "select * from `t` ignore index"},
 		{"select /*+ ", "select "},
 		{"select * from 🥳", "select * from"},
 		{"select 1 / 2", "select ? / ?"},
-		{"select * from t where a = 40 limit ?, ?", "select * from t where a = ? limit ..."},
-		{"select * from t where a > ?", "select * from t where a > ?"},
-		{"select @a=b from t", "select @a = b from t"},
+		{"select * from t where a = 40 limit ?, ?", "select * from `t` where `a` = ? limit ..."},
+		{"select * from t where a > ?", "select * from `t` where `a` > ?"},
+		{"select @a=b from t", "select @a = `b` from `t`"},
 	}
 	for _, test := range tests {
 		normalized := parser.Normalize(test.input)
@@ -73,7 +74,7 @@ func (s *testSQLDigestSuite) TestNormalizeDigest(c *C) {
 		normalized string
 		digest     string
 	}{
-		{"select 1 from b where id in (1, 3, '3', 1, 2, 3, 4)", "select ? from b where id in ( ... )", "f36161eef94dbfbd5e2f6b9a2f498a4c7facc6860621fbeb8084f63898275016"},
+		{"select 1 from b where id in (1, 3, '3', 1, 2, 3, 4)", "select ? from `b` where `id` in ( ... )", "e1c8cc2738f596dc24f15ef8eb55e0d902910d7298983496362a7b46dbc0b310"},
 	}
 	for _, test := range tests {
 		normalized, digest := parser.NormalizeDigest(test.sql)