Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tables: fix prefix index, when the charset is utf8, truncate it from runes #7109

Merged
merged 6 commits into from
Jul 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ddl/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ func buildIndexColumns(columns []*model.ColumnInfo, idxColNames []*ast.IndexColN
Name: col.Name,
Offset: col.Offset,
Length: ic.Length,
Tp: &col.FieldType,
})
}

Expand Down
14 changes: 14 additions & 0 deletions expression/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3401,3 +3401,17 @@ func (s *testIntegrationSuite) TestTwoDecimalTruncate(c *C) {
res = tk.MustQuery("select 2.00000000000000000000000000000001 * 1.000000000000000000000000000000000000000000002")
res.Check(testkit.Rows("2.000000000000000000000000000000"))
}

func (s *testIntegrationSuite) TestPrefixIndex(c *C) {
tk := testkit.NewTestKit(c, s.store)
defer s.cleanEnv(c)
tk.MustExec("use test")
tk.MustExec(`CREATE TABLE t1 (
name varchar(12) DEFAULT NULL,
KEY pname (name(12))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci`)

tk.MustExec("insert into t1 values('借款策略集_网页');")
res := tk.MustQuery("select * from t1 where name = '借款策略集_网页';")
res.Check(testkit.Rows("借款策略集_网页"))
}
2 changes: 2 additions & 0 deletions model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,8 @@ type IndexColumn struct {
// for indexing;
// UnspecifedLength if not using prefix indexing
Length int `json:"length"`
// Tp is the index column field type.
Tp *types.FieldType
}

// Clone clones IndexColumn.
Expand Down
39 changes: 29 additions & 10 deletions table/tables/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"bytes"
"encoding/binary"
"io"
"unicode/utf8"

"github.com/juju/errors"
"github.com/pingcap/tidb/kv"
Expand All @@ -26,6 +27,7 @@ import (
"github.com/pingcap/tidb/table"
"github.com/pingcap/tidb/tablecodec"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/charset"
"github.com/pingcap/tidb/util/codec"
)

Expand Down Expand Up @@ -124,6 +126,32 @@ func (c *index) getIndexKeyBuf(buf []byte, defaultCap int) []byte {
return make([]byte, 0, defaultCap)
}

// truncateIndexValuesIfNeeded truncate the index values that be created that use only the leading part of column values.
func (c *index) truncateIndexValuesIfNeeded(indexedValues []types.Datum) []types.Datum {
for i := 0; i < len(indexedValues); i++ {
v := &indexedValues[i]
if v.Kind() == types.KindString || v.Kind() == types.KindBytes {
ic := c.idxInfo.Columns[i]
if ic.Tp.Charset == charset.CharsetUTF8 || ic.Tp.Charset == charset.CharsetUTF8MB4 {
val := v.GetBytes()
if ic.Length != types.UnspecifiedLength && utf8.RuneCount(val) > ic.Length {
Copy link
Contributor

@birdstorm birdstorm Jul 19, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can use utf8.RuneCountInString() instead, and thus eliminate the usage of val.

Copy link
Contributor Author

@winkyao winkyao Jul 19, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But RuneCountInString needs to convert bytes to string first, it's unworthy.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see.

rs := bytes.Runes(val)
truncateStr := string(rs[:ic.Length])
// truncate value and limit its length
v.SetString(truncateStr)
}
} else {
if ic.Length != types.UnspecifiedLength && len(v.GetBytes()) > ic.Length {
// truncate value and limit its length
v.SetBytes(v.GetBytes()[:ic.Length])
}
}
}
}

return indexedValues
}

// GenIndexKey generates storage key for index values. Returned distinct indicates whether the
// indexed values should be distinct in storage (i.e. whether handle is encoded in the key).
func (c *index) GenIndexKey(sc *stmtctx.StatementContext, indexedValues []types.Datum, h int64, buf []byte) (key []byte, distinct bool, err error) {
Expand All @@ -143,16 +171,7 @@ func (c *index) GenIndexKey(sc *stmtctx.StatementContext, indexedValues []types.

// For string columns, indexes can be created that use only the leading part of column values,
// using col_name(length) syntax to specify an index prefix length.
for i := 0; i < len(indexedValues); i++ {
v := &indexedValues[i]
if v.Kind() == types.KindString || v.Kind() == types.KindBytes {
ic := c.idxInfo.Columns[i]
if ic.Length != types.UnspecifiedLength && len(v.GetBytes()) > ic.Length {
// truncate value and limit its length
v.SetBytes(v.GetBytes()[:ic.Length])
}
}
}
indexedValues = c.truncateIndexValuesIfNeeded(indexedValues)
key = c.getIndexKeyBuf(buf, len(c.prefix)+len(indexedValues)*9+9)
key = append(key, []byte(c.prefix)...)
key, err = codec.EncodeKey(sc, key, indexedValues...)
Expand Down
4 changes: 2 additions & 2 deletions table/tables/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,8 @@ func (s *testIndexSuite) TestCombineIndexSeek(c *C) {
ID: 2,
Name: model.NewCIStr("test"),
Columns: []*model.IndexColumn{
{},
{},
{Tp: &types.FieldType{}},
{Tp: &types.FieldType{}},
},
},
},
Expand Down