diff --git a/pkg/statistics/handle/usage/BUILD.bazel b/pkg/statistics/handle/usage/BUILD.bazel index eda51d50998ad..b918b0bc8385b 100644 --- a/pkg/statistics/handle/usage/BUILD.bazel +++ b/pkg/statistics/handle/usage/BUILD.bazel @@ -35,10 +35,12 @@ go_test( timeout = "short", srcs = [ "index_usage_integration_test.go", + "predicate_column_test.go", "session_stats_collect_test.go", ], embed = [":usage"], flaky = True, + shard_count = 3, deps = [ "//pkg/infoschema", "//pkg/parser/model", diff --git a/pkg/statistics/handle/usage/predicate_column.go b/pkg/statistics/handle/usage/predicate_column.go index 208917a3a6372..1e45151aff01c 100644 --- a/pkg/statistics/handle/usage/predicate_column.go +++ b/pkg/statistics/handle/usage/predicate_column.go @@ -16,9 +16,11 @@ package usage import ( "encoding/json" + "fmt" "time" "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/sessionctx" @@ -65,7 +67,7 @@ func (u *statsUsageImpl) GetPredicateColumns(tableID int64) (columnIDs []int64, err = utilstats.CallWithSCtx(u.statsHandle.SPool(), func(sctx sessionctx.Context) error { columnIDs, err = GetPredicateColumns(sctx, tableID) return err - }) + }, utilstats.FlagWrapTxn) return } @@ -123,6 +125,11 @@ func LoadColumnStatsUsage(sctx sessionctx.Context, loc *time.Location) (map[mode // GetPredicateColumns returns IDs of predicate columns, which are the columns whose stats are used(needed) when generating query plans. func GetPredicateColumns(sctx sessionctx.Context, tableID int64) ([]int64, error) { + // Each time we retrieve the predicate columns, we also attempt to remove any column stats usage information whose column is dropped. + err := cleanupDroppedColumnStatsUsage(sctx, tableID) + if err != nil { + return nil, errors.Trace(err) + } // This time is the time when `set global tidb_enable_column_tracking = 0`. disableTime, err := getDisableColumnTrackingTime(sctx) if err != nil { @@ -159,6 +166,33 @@ func GetPredicateColumns(sctx sessionctx.Context, tableID int64) ([]int64, error return columnIDs, nil } +// cleanupDroppedColumnStatsUsage deletes the column stats usage information whose column is dropped. +func cleanupDroppedColumnStatsUsage(sctx sessionctx.Context, tableID int64) error { + is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) + table, ok := is.TableByID(tableID) + if !ok { + // Usually, it should not happen. + // But if it happens, we can safely do nothing. + return nil + } + allColumns := table.Meta().Columns + // Due to SQL limitations, column IDs must be converted to strings for proper escaping in the query :( + columnIDs := make([]string, 0, len(allColumns)) + for _, col := range allColumns { + columnIDs = append(columnIDs, fmt.Sprintf("%d", col.ID)) + } + + // Delete the column stats usage information whose column is dropped. + _, _, err := utilstats.ExecRows( + sctx, + "DELETE FROM mysql.column_stats_usage WHERE table_id = %? AND column_id NOT IN (%?)", + tableID, + columnIDs, + ) + + return err +} + // getDisableColumnTrackingTime reads the value of tidb_disable_column_tracking_time from mysql.tidb if it exists. // UTC time format is used to store the time. func getDisableColumnTrackingTime(sctx sessionctx.Context) (*time.Time, error) { diff --git a/pkg/statistics/handle/usage/predicate_column_test.go b/pkg/statistics/handle/usage/predicate_column_test.go new file mode 100644 index 0000000000000..d3e0a40b25d1f --- /dev/null +++ b/pkg/statistics/handle/usage/predicate_column_test.go @@ -0,0 +1,56 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package usage_test + +import ( + "testing" + + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/testkit" + "github.com/stretchr/testify/require" +) + +func TestCleanupPredicateColumns(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + // Create table and select data with predicate. + tk.MustExec("use test") + tk.MustExec("create table t (a int, b int)") + tk.MustExec("insert into t values (1, 1), (2, 2), (3, 3)") + // Enable column tracking. + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustQuery("select * from t where a > 1").Check(testkit.Rows("2 2", "3 3")) + tk.MustQuery("select * from t where b > 1").Check(testkit.Rows("2 2", "3 3")) + + // Dump the statistics usage. + h := dom.StatsHandle() + err := h.DumpColStatsUsageToKV() + require.NoError(t, err) + + // Check the statistics usage. + rows := tk.MustQuery("select * from mysql.column_stats_usage").Rows() + require.Len(t, rows, 2) + + // Drop column b. + tk.MustExec("alter table t drop column b") + // Get table ID. + is := dom.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + columns, err := h.GetPredicateColumns(tbl.Meta().ID) + require.NoError(t, err) + require.Len(t, columns, 1) +}