From 6199ae8e79ef903a88852bff75fc02cacbcf1166 Mon Sep 17 00:00:00 2001 From: Rustin Liu Date: Wed, 25 Oct 2023 21:03:34 +0800 Subject: [PATCH] This is an automated cherry-pick of #47928 Signed-off-by: ti-chi-bot --- statistics/builder.go | 47 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/statistics/builder.go b/statistics/builder.go index c9ed700d7cbc0..1d6198e2c761a 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -19,12 +19,23 @@ import ( "math" "github.com/pingcap/errors" +<<<<<<< HEAD:statistics/builder.go "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/codec" "github.com/pingcap/tidb/util/collate" "github.com/pingcap/tidb/util/memory" +======= + "github.com/pingcap/tidb/pkg/sessionctx" + "github.com/pingcap/tidb/pkg/sessionctx/stmtctx" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/codec" + "github.com/pingcap/tidb/pkg/util/collate" + "github.com/pingcap/tidb/pkg/util/logutil" + "github.com/pingcap/tidb/pkg/util/memory" + "go.uber.org/zap" +>>>>>>> d9d5b9c3972 (statistic: fix panic when building topN (#47928)):pkg/statistics/builder.go ) // SortedBuilder is used to build histograms for PK and index. @@ -372,12 +383,46 @@ func BuildHistAndTopN( if err != nil { return nil, nil, errors.Trace(err) } + // For debugging invalid sample data. + var ( + foundTwice bool + firstTimeSample types.Datum + ) for j := 0; j < len(topNList); j++ { if bytes.Equal(sampleBytes, topNList[j].Encoded) { - // find the same value in topn: need to skip over this value in samples + // This should never happen, but we met this panic before, so we add this check here. + // See: https://github.com/pingcap/tidb/issues/35948 + if foundTwice { + datumString, err := firstTimeSample.ToString() + if err != nil { + logutil.BgLogger().With( + zap.String("category", "stats"), + ).Error("try to convert datum to string failed", zap.Error(err)) + } + + logutil.BgLogger().With( + zap.String("category", "stats"), + ).Warn( + "invalid sample data", + zap.Bool("isColumn", isColumn), + zap.Int64("columnID", id), + zap.String("datum", datumString), + zap.Binary("sampleBytes", sampleBytes), + zap.Binary("topNBytes", topNList[j].Encoded), + ) + // NOTE: if we don't return here, we may meet panic in the following code. + // The i may decrease to a negative value. + // We haven't fix the issue here, because we don't know how to + // remove the invalid sample data from the samples. + break + } + // First time to find the same value in topN: need to record the sample data for debugging. + firstTimeSample = samples[i].Value + // Found the same value in topn: need to skip over this value in samples. copy(samples[i:], samples[uint64(i)+topNList[j].Count:]) samples = samples[:uint64(len(samples))-topNList[j].Count] i-- + foundTwice = true continue } }