From 06aa1925cc2f9462dd550ac841af4ed4336dc2e2 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 11 Sep 2024 23:43:27 +0800 Subject: [PATCH 1/3] This is an automated cherry-pick of #55685 Signed-off-by: ti-chi-bot --- pkg/statistics/handle/bootstrap.go | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/pkg/statistics/handle/bootstrap.go b/pkg/statistics/handle/bootstrap.go index 4005c93fd9685..e310274e4196a 100644 --- a/pkg/statistics/handle/bootstrap.go +++ b/pkg/statistics/handle/bootstrap.go @@ -554,6 +554,12 @@ func (h *Handle) initStatsFMSketch(cache util.StatsCache) error { func (*Handle) initStatsBuckets4Chunk(cache util.StatsCache, iter *chunk.Iterator4Chunk) { var table *statistics.Table + unspecifiedLengthTp := types.NewFieldType(mysql.TypeBlob) + var ( + hasErr bool + failedTableID int64 + failedHistID int64 + ) for row := iter.Begin(); row != iter.End(); row = iter.Next() { tableID, isIndex, histID := row.GetInt64(0), row.GetInt64(1), row.GetInt64(2) if table == nil || table.PhysicalID != tableID { @@ -595,6 +601,7 @@ func (*Handle) initStatsBuckets4Chunk(cache util.StatsCache, iter *chunk.Iterato sc.AllowInvalidDate = true sc.IgnoreZeroInDate = true var err error +<<<<<<< HEAD lower, err = d.ConvertTo(sc, &column.Info.FieldType) if err != nil { logutil.BgLogger().Debug("decode bucket lower bound failed", zap.Error(err)) @@ -606,6 +613,38 @@ func (*Handle) initStatsBuckets4Chunk(cache util.StatsCache, iter *chunk.Iterato if err != nil { logutil.BgLogger().Debug("decode bucket upper bound failed", zap.Error(err)) delete(table.Columns, histID) +======= + if column.Info.FieldType.EvalType() == types.ETString && column.Info.FieldType.GetType() != mysql.TypeEnum && column.Info.FieldType.GetType() != mysql.TypeSet { + // For new collation data, when storing the bounds of the histogram, we store the collate key instead of the + // original value. + // But there's additional conversion logic for new collation data, and the collate key might be longer than + // the FieldType.flen. + // If we use the original FieldType here, there might be errors like "Invalid utf8mb4 character string" + // or "Data too long". + // So we change it to TypeBlob to bypass those logics here. + lower, err = d.ConvertTo(statistics.UTCWithAllowInvalidDateCtx, unspecifiedLengthTp) + } else { + lower, err = d.ConvertTo(statistics.UTCWithAllowInvalidDateCtx, &column.Info.FieldType) + } + if err != nil { + hasErr = true + failedTableID = tableID + failedHistID = histID + table.DelCol(histID) + continue + } + d = types.NewBytesDatum(row.GetBytes(6)) + if column.Info.FieldType.EvalType() == types.ETString && column.Info.FieldType.GetType() != mysql.TypeEnum && column.Info.FieldType.GetType() != mysql.TypeSet { + upper, err = d.ConvertTo(statistics.UTCWithAllowInvalidDateCtx, unspecifiedLengthTp) + } else { + upper, err = d.ConvertTo(statistics.UTCWithAllowInvalidDateCtx, &column.Info.FieldType) + } + if err != nil { + hasErr = true + failedTableID = tableID + failedHistID = histID + table.DelCol(histID) +>>>>>>> ebf31468577 (statistics: fix the error that init stats might got failure when decoding column bucket (#55685)) continue } } @@ -614,6 +653,9 @@ func (*Handle) initStatsBuckets4Chunk(cache util.StatsCache, iter *chunk.Iterato if table != nil { cache.Put(table.PhysicalID, table) // put this table in the cache because all statstics of the table have been read. } + if hasErr { + logutil.BgLogger().Error("failed to convert datum for at least one histogram bucket", zap.Int64("table ID", failedTableID), zap.Int64("column ID", failedHistID)) + } } func (h *Handle) initStatsBuckets(cache util.StatsCache, totalMemory uint64) error { From 5c4faf7ce124146380d2f993ae51aebcb06b247f Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 25 Sep 2024 02:57:54 +0800 Subject: [PATCH 2/3] Update bootstrap.go --- pkg/statistics/handle/bootstrap.go | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/pkg/statistics/handle/bootstrap.go b/pkg/statistics/handle/bootstrap.go index e310274e4196a..a3d116b3c9dc3 100644 --- a/pkg/statistics/handle/bootstrap.go +++ b/pkg/statistics/handle/bootstrap.go @@ -601,19 +601,6 @@ func (*Handle) initStatsBuckets4Chunk(cache util.StatsCache, iter *chunk.Iterato sc.AllowInvalidDate = true sc.IgnoreZeroInDate = true var err error -<<<<<<< HEAD - lower, err = d.ConvertTo(sc, &column.Info.FieldType) - if err != nil { - logutil.BgLogger().Debug("decode bucket lower bound failed", zap.Error(err)) - delete(table.Columns, histID) - continue - } - d = types.NewBytesDatum(row.GetBytes(6)) - upper, err = d.ConvertTo(sc, &column.Info.FieldType) - if err != nil { - logutil.BgLogger().Debug("decode bucket upper bound failed", zap.Error(err)) - delete(table.Columns, histID) -======= if column.Info.FieldType.EvalType() == types.ETString && column.Info.FieldType.GetType() != mysql.TypeEnum && column.Info.FieldType.GetType() != mysql.TypeSet { // For new collation data, when storing the bounds of the histogram, we store the collate key instead of the // original value. @@ -622,9 +609,9 @@ func (*Handle) initStatsBuckets4Chunk(cache util.StatsCache, iter *chunk.Iterato // If we use the original FieldType here, there might be errors like "Invalid utf8mb4 character string" // or "Data too long". // So we change it to TypeBlob to bypass those logics here. - lower, err = d.ConvertTo(statistics.UTCWithAllowInvalidDateCtx, unspecifiedLengthTp) + lower, err = d.ConvertTo(sc, unspecifiedLengthTp) } else { - lower, err = d.ConvertTo(statistics.UTCWithAllowInvalidDateCtx, &column.Info.FieldType) + lower, err = d.ConvertTo(sc, &column.Info.FieldType) } if err != nil { hasErr = true @@ -635,16 +622,15 @@ func (*Handle) initStatsBuckets4Chunk(cache util.StatsCache, iter *chunk.Iterato } d = types.NewBytesDatum(row.GetBytes(6)) if column.Info.FieldType.EvalType() == types.ETString && column.Info.FieldType.GetType() != mysql.TypeEnum && column.Info.FieldType.GetType() != mysql.TypeSet { - upper, err = d.ConvertTo(statistics.UTCWithAllowInvalidDateCtx, unspecifiedLengthTp) + upper, err = d.ConvertTo(sc, unspecifiedLengthTp) } else { - upper, err = d.ConvertTo(statistics.UTCWithAllowInvalidDateCtx, &column.Info.FieldType) + upper, err = d.ConvertTo(sc, &column.Info.FieldType) } if err != nil { hasErr = true failedTableID = tableID failedHistID = histID table.DelCol(histID) ->>>>>>> ebf31468577 (statistics: fix the error that init stats might got failure when decoding column bucket (#55685)) continue } } From 2ad24682f8eb21fbc104beae2b031b2a0ec0a156 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 25 Sep 2024 22:36:04 +0800 Subject: [PATCH 3/3] Update bootstrap.go --- pkg/statistics/handle/bootstrap.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/statistics/handle/bootstrap.go b/pkg/statistics/handle/bootstrap.go index a3d116b3c9dc3..8fbcff615f0c1 100644 --- a/pkg/statistics/handle/bootstrap.go +++ b/pkg/statistics/handle/bootstrap.go @@ -617,7 +617,7 @@ func (*Handle) initStatsBuckets4Chunk(cache util.StatsCache, iter *chunk.Iterato hasErr = true failedTableID = tableID failedHistID = histID - table.DelCol(histID) + delete(table.Columns, histID) continue } d = types.NewBytesDatum(row.GetBytes(6)) @@ -630,7 +630,7 @@ func (*Handle) initStatsBuckets4Chunk(cache util.StatsCache, iter *chunk.Iterato hasErr = true failedTableID = tableID failedHistID = histID - table.DelCol(histID) + delete(table.Columns, histID) continue } }