rowcodec: make rowcodec can be used for mocktikv & unistore (#13774)

pingcap · Dec 9, 2019 · b364b89 · b364b89
1 parent d7206fb
commit b364b89
Show file tree

Hide file tree

Showing 7 changed files with 1,543 additions and 497 deletions.
diff --git a/util/rowcodec/bench_test.go b/util/rowcodec/bench_test.go
@@ -0,0 +1,94 @@
+// Copyright 2019 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rowcodec_test
+
+import (
+	"testing"
+	"time"
+
+	"github.com/pingcap/parser/mysql"
+	"github.com/pingcap/tidb/sessionctx/stmtctx"
+	"github.com/pingcap/tidb/tablecodec"
+	"github.com/pingcap/tidb/types"
+	"github.com/pingcap/tidb/util/chunk"
+	"github.com/pingcap/tidb/util/rowcodec"
+)
+
+func BenchmarkEncode(b *testing.B) {
+	b.ReportAllocs()
+	oldRow := types.MakeDatums(1, "abc", 1.1)
+	var xb rowcodec.Encoder
+	var buf []byte
+	colIDs := []int64{1, 2, 3}
+	var err error
+	for i := 0; i < b.N; i++ {
+		buf, err = xb.Encode(nil, colIDs, oldRow, buf)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
+
+func BenchmarkEncodeFromOldRow(b *testing.B) {
+	b.ReportAllocs()
+	oldRow := types.MakeDatums(1, "abc", 1.1)
+	oldRowData, err := tablecodec.EncodeRow(new(stmtctx.StatementContext), oldRow, []int64{1, 2, 3}, nil, nil)
+	if err != nil {
+		b.Fatal(err)
+	}
+	var xb rowcodec.Encoder
+	var buf []byte
+	for i := 0; i < b.N; i++ {
+		buf, err = rowcodec.EncodeFromOldRow(&xb, nil, oldRowData, buf)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
+
+func BenchmarkDecode(b *testing.B) {
+	b.ReportAllocs()
+	oldRow := types.MakeDatums(1, "abc", 1.1)
+	colIDs := []int64{-1, 2, 3}
+	tps := []*types.FieldType{
+		types.NewFieldType(mysql.TypeLonglong),
+		types.NewFieldType(mysql.TypeString),
+		types.NewFieldType(mysql.TypeDouble),
+	}
+	var xb rowcodec.Encoder
+	xRowData, err := xb.Encode(nil, colIDs, oldRow, nil)
+	if err != nil {
+		b.Fatal(err)
+	}
+	cols := make([]rowcodec.ColInfo, len(tps))
+	for i, tp := range tps {
+		cols[i] = rowcodec.ColInfo{
+			ID:      colIDs[i],
+			Tp:      int32(tp.Tp),
+			Flag:    int32(tp.Flag),
+			Flen:    tp.Flen,
+			Decimal: tp.Decimal,
+			Elems:   tp.Elems,
+		}
+	}
+	decoder := rowcodec.NewChunkDecoder(cols, -1, nil, time.Local)
+	chk := chunk.NewChunkWithCapacity(tps, 1)
+	for i := 0; i < b.N; i++ {
+		chk.Reset()
+		err = decoder.DecodeToChunk(xRowData, 1, chk)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
diff --git a/util/rowcodec/common.go b/util/rowcodec/common.go
@@ -26,68 +26,19 @@ const CodecVer = 128
 
 var errInvalidCodecVer = errors.New("invalid codec version")
 
-// row is the struct type used to access a row.
-// There are two types of row, small and large.
-// A small row takes one byte colID and two bytes offset, optimized for most cases.
-// If the max colID is larger than 255 or total value size is larger than 65535, the row type would be large.
-// A large row takes four bytes colID and four bytes offset.
-type row struct {
-	isLarge        bool
-	numNotNullCols uint16
-	numNullCols    uint16
-	data           []byte
-
-	// for small rows
-	colIDs  []byte
-	offsets []uint16
-
-	// for large row
-	colIDs32  []uint32
-	offsets32 []uint32
-}
-
-func (r *row) getData(i int) []byte {
-	var start, end uint32
-	if r.isLarge {
-		if i > 0 {
-			start = r.offsets32[i-1]
-		}
-		end = r.offsets32[i]
-	} else {
-		if i > 0 {
-			start = uint32(r.offsets[i-1])
-		}
-		end = uint32(r.offsets[i])
-	}
-	return r.data[start:end]
-}
-
-func (r *row) setRowData(rowData []byte) error {
-	if rowData[0] != CodecVer {
-		return errInvalidCodecVer
-	}
-	r.isLarge = rowData[1]&1 > 0
-	r.numNotNullCols = binary.LittleEndian.Uint16(rowData[2:])
-	r.numNullCols = binary.LittleEndian.Uint16(rowData[4:])
-	cursor := 6
-	if r.isLarge {
-		colIDsLen := int(r.numNotNullCols+r.numNullCols) * 4
-		r.colIDs32 = bytesToU32Slice(rowData[cursor : cursor+colIDsLen])
-		cursor += colIDsLen
-		offsetsLen := int(r.numNotNullCols) * 4
-		r.offsets32 = bytesToU32Slice(rowData[cursor : cursor+offsetsLen])
-		cursor += offsetsLen
-	} else {
-		colIDsLen := int(r.numNotNullCols + r.numNullCols)
-		r.colIDs = rowData[cursor : cursor+colIDsLen]
-		cursor += colIDsLen
-		offsetsLen := int(r.numNotNullCols) * 2
-		r.offsets = bytes2U16Slice(rowData[cursor : cursor+offsetsLen])
-		cursor += offsetsLen
-	}
-	r.data = rowData[cursor:]
-	return nil
-}
+// First byte in the encoded value which specifies the encoding type.
+const (
+	NilFlag          byte = 0
+	BytesFlag        byte = 1
+	CompactBytesFlag byte = 2
+	IntFlag          byte = 3
+	UintFlag         byte = 4
+	FloatFlag        byte = 5
+	DecimalFlag      byte = 6
+	VarintFlag       byte = 8
+	VaruintFlag      byte = 9
+	JSONFlag         byte = 10
+)
 
 func bytesToU32Slice(b []byte) []uint32 {
 	if len(b) == 0 {
@@ -196,3 +147,84 @@ func decodeUint(val []byte) uint64 {
 		return binary.LittleEndian.Uint64(val)
 	}
 }
+
+type largeNotNullSorter Encoder
+
+func (s *largeNotNullSorter) Less(i, j int) bool {
+	return s.colIDs32[i] < s.colIDs32[j]
+}
+
+func (s *largeNotNullSorter) Len() int {
+	return int(s.numNotNullCols)
+}
+
+func (s *largeNotNullSorter) Swap(i, j int) {
+	s.colIDs32[i], s.colIDs32[j] = s.colIDs32[j], s.colIDs32[i]
+	s.values[i], s.values[j] = s.values[j], s.values[i]
+}
+
+type smallNotNullSorter Encoder
+
+func (s *smallNotNullSorter) Less(i, j int) bool {
+	return s.colIDs[i] < s.colIDs[j]
+}
+
+func (s *smallNotNullSorter) Len() int {
+	return int(s.numNotNullCols)
+}
+
+func (s *smallNotNullSorter) Swap(i, j int) {
+	s.colIDs[i], s.colIDs[j] = s.colIDs[j], s.colIDs[i]
+	s.values[i], s.values[j] = s.values[j], s.values[i]
+}
+
+type smallNullSorter Encoder
+
+func (s *smallNullSorter) Less(i, j int) bool {
+	nullCols := s.colIDs[s.numNotNullCols:]
+	return nullCols[i] < nullCols[j]
+}
+
+func (s *smallNullSorter) Len() int {
+	return int(s.numNullCols)
+}
+
+func (s *smallNullSorter) Swap(i, j int) {
+	nullCols := s.colIDs[s.numNotNullCols:]
+	nullCols[i], nullCols[j] = nullCols[j], nullCols[i]
+}
+
+type largeNullSorter Encoder
+
+func (s *largeNullSorter) Less(i, j int) bool {
+	nullCols := s.colIDs32[s.numNotNullCols:]
+	return nullCols[i] < nullCols[j]
+}
+
+func (s *largeNullSorter) Len() int {
+	return int(s.numNullCols)
+}
+
+func (s *largeNullSorter) Swap(i, j int) {
+	nullCols := s.colIDs32[s.numNotNullCols:]
+	nullCols[i], nullCols[j] = nullCols[j], nullCols[i]
+}
+
+const (
+	// Length of rowkey.
+	rowKeyLen = 19
+	// Index of record flag 'r' in rowkey used by master tidb-server.
+	// The rowkey format is t{8 bytes id}_r{8 bytes handle}
+	recordPrefixIdx = 10
+)
+
+// IsRowKey determine whether key is row key.
+// this method will be used in unistore.
+func IsRowKey(key []byte) bool {
+	return len(key) == rowKeyLen && key[0] == 't' && key[recordPrefixIdx] == 'r'
+}
+
+// IsNewFormat checks whether row data is in new-format.
+func IsNewFormat(rowData []byte) bool {
+	return rowData[0] == CodecVer
+}