Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: fix the error that init stats might got failure when decoding column bucket (#55685) #56026

Merged
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions pkg/statistics/handle/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,12 @@ func (h *Handle) initStatsFMSketch(cache util.StatsCache) error {

func (*Handle) initStatsBuckets4Chunk(cache util.StatsCache, iter *chunk.Iterator4Chunk) {
var table *statistics.Table
unspecifiedLengthTp := types.NewFieldType(mysql.TypeBlob)
var (
hasErr bool
failedTableID int64
failedHistID int64
)
for row := iter.Begin(); row != iter.End(); row = iter.Next() {
tableID, isIndex, histID := row.GetInt64(0), row.GetInt64(1), row.GetInt64(2)
if table == nil || table.PhysicalID != tableID {
Expand Down Expand Up @@ -595,17 +601,36 @@ func (*Handle) initStatsBuckets4Chunk(cache util.StatsCache, iter *chunk.Iterato
sc.AllowInvalidDate = true
sc.IgnoreZeroInDate = true
var err error
lower, err = d.ConvertTo(sc, &column.Info.FieldType)
if column.Info.FieldType.EvalType() == types.ETString && column.Info.FieldType.GetType() != mysql.TypeEnum && column.Info.FieldType.GetType() != mysql.TypeSet {
// For new collation data, when storing the bounds of the histogram, we store the collate key instead of the
// original value.
// But there's additional conversion logic for new collation data, and the collate key might be longer than
// the FieldType.flen.
// If we use the original FieldType here, there might be errors like "Invalid utf8mb4 character string"
// or "Data too long".
// So we change it to TypeBlob to bypass those logics here.
lower, err = d.ConvertTo(sc, unspecifiedLengthTp)
} else {
lower, err = d.ConvertTo(sc, &column.Info.FieldType)
}
if err != nil {
logutil.BgLogger().Debug("decode bucket lower bound failed", zap.Error(err))
delete(table.Columns, histID)
hasErr = true
failedTableID = tableID
failedHistID = histID
table.DelCol(histID)
continue
}
d = types.NewBytesDatum(row.GetBytes(6))
upper, err = d.ConvertTo(sc, &column.Info.FieldType)
if column.Info.FieldType.EvalType() == types.ETString && column.Info.FieldType.GetType() != mysql.TypeEnum && column.Info.FieldType.GetType() != mysql.TypeSet {
upper, err = d.ConvertTo(sc, unspecifiedLengthTp)
} else {
upper, err = d.ConvertTo(sc, &column.Info.FieldType)
}
if err != nil {
logutil.BgLogger().Debug("decode bucket upper bound failed", zap.Error(err))
delete(table.Columns, histID)
hasErr = true
failedTableID = tableID
failedHistID = histID
table.DelCol(histID)
continue
}
}
Expand All @@ -614,6 +639,9 @@ func (*Handle) initStatsBuckets4Chunk(cache util.StatsCache, iter *chunk.Iterato
if table != nil {
cache.Put(table.PhysicalID, table) // put this table in the cache because all statstics of the table have been read.
}
if hasErr {
logutil.BgLogger().Error("failed to convert datum for at least one histogram bucket", zap.Int64("table ID", failedTableID), zap.Int64("column ID", failedHistID))
}
}

func (h *Handle) initStatsBuckets(cache util.StatsCache, totalMemory uint64) error {
Expand Down