diff --git a/br/pkg/restore/snap_client/systable_restore.go b/br/pkg/restore/snap_client/systable_restore.go index 6210c2fccab4a..c939640849f37 100644 --- a/br/pkg/restore/snap_client/systable_restore.go +++ b/br/pkg/restore/snap_client/systable_restore.go @@ -39,6 +39,7 @@ var statsTables = map[string]map[string]struct{}{ "stats_meta_history": {}, "stats_table_locked": {}, "stats_top_n": {}, + "column_stats_usage": {}, }, } @@ -57,10 +58,8 @@ var sysPrivilegeTableMap = map[string]string{ var unRecoverableTable = map[string]map[string]struct{}{ "mysql": { // some variables in tidb (e.g. gc_safe_point) cannot be recovered. - "tidb": {}, - "global_variables": {}, - - "column_stats_usage": {}, + "tidb": {}, + "global_variables": {}, "capture_plan_baselines_blacklist": {}, // gc info don't need to recover. "gc_delete_range": {}, diff --git a/pkg/statistics/handle/storage/BUILD.bazel b/pkg/statistics/handle/storage/BUILD.bazel index c477c6347663f..b89e2135e4466 100644 --- a/pkg/statistics/handle/storage/BUILD.bazel +++ b/pkg/statistics/handle/storage/BUILD.bazel @@ -29,6 +29,7 @@ go_library( "//pkg/statistics/handle/logutil", "//pkg/statistics/handle/metrics", "//pkg/statistics/handle/types", + "//pkg/statistics/handle/usage/predicatecolumn", "//pkg/statistics/handle/util", "//pkg/types", "//pkg/util/chunk", @@ -56,7 +57,7 @@ go_test( "stats_read_writer_test.go", ], flaky = True, - shard_count = 22, + shard_count = 23, deps = [ ":storage", "//pkg/domain", diff --git a/pkg/statistics/handle/storage/dump_test.go b/pkg/statistics/handle/storage/dump_test.go index 853987b581d34..82bf11d1b9dac 100644 --- a/pkg/statistics/handle/storage/dump_test.go +++ b/pkg/statistics/handle/storage/dump_test.go @@ -238,6 +238,48 @@ func TestLoadPartitionStats(t *testing.T) { requireTableEqual(t, originGlobalStats, dom.StatsHandle().GetTableStats(tableInfo)) } +func TestLoadPredicateColumns(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + tk.MustExec("drop table if exists t") + tk.MustExec("create table t (a int, b int, c int, primary key(a), index idx(b))") + tk.MustExec("insert into t values (1, 2, 3), (2, 3, 4), (3, 4, 5)") + tk.MustExec("select * from t where b = 1") + is := dom.InfoSchema() + h := dom.StatsHandle() + require.NoError(t, h.Update(context.Background(), is)) + require.NoError(t, h.DumpColStatsUsageToKV()) + tk.MustExec("analyze table t") + + table, err := is.TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tableInfo := table.Meta() + jsonTbl, err := h.DumpStatsToJSON("test", tableInfo, nil, true) + require.NoError(t, err) + + // remove all statistics + tk.MustExec("delete from mysql.stats_meta") + tk.MustExec("delete from mysql.stats_histograms") + tk.MustExec("delete from mysql.stats_buckets") + tk.MustExec("delete from mysql.column_stats_usage") + h.Clear() + + // load stats back + require.NoError(t, h.LoadStatsFromJSON(context.Background(), is, jsonTbl, 0)) + + // check column stats usage + rows := tk.MustQuery("select table_id, column_id, last_used_at, last_analyzed_at from mysql.column_stats_usage order by column_id").Rows() + require.Equal(t, 2, len(rows)) + require.Equal(t, "1", rows[0][1].(string)) + require.Equal(t, "", rows[0][2], "It hasn't been used since last analyze") + require.NotEqual(t, "", rows[0][3]) + require.Equal(t, "2", rows[1][1].(string)) + require.NotEqual(t, "", rows[1][2]) + require.NotEqual(t, "", rows[1][3]) +} + func TestLoadPartitionStatsErrPanic(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) tk := testkit.NewTestKit(t, store) @@ -666,6 +708,7 @@ PARTITION BY RANGE ( a ) ( persistStats(ctx, t, dom, "test", "t1", func(ctx context.Context, jsonTable *handleutil.JSONTable, physicalID int64) error { require.True(t, physicalID > 0) require.NotNil(t, jsonTable) + require.NotNil(t, jsonTable.PredicateColumns) statsCnt += 1 return nil }) @@ -674,6 +717,7 @@ PARTITION BY RANGE ( a ) ( persistStats(ctx, t, dom, "test", "t2", func(ctx context.Context, jsonTable *handleutil.JSONTable, physicalID int64) error { require.True(t, physicalID > 0) require.NotNil(t, jsonTable) + require.NotNil(t, jsonTable.PredicateColumns) statsCnt += 1 return nil }) diff --git a/pkg/statistics/handle/storage/json.go b/pkg/statistics/handle/storage/json.go index 910888a96aad2..88529945bde14 100644 --- a/pkg/statistics/handle/storage/json.go +++ b/pkg/statistics/handle/storage/json.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/sessionctx" "github.com/pingcap/tidb/pkg/statistics" + statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types" "github.com/pingcap/tidb/pkg/statistics/handle/util" "github.com/pingcap/tidb/pkg/types" compressutil "github.com/pingcap/tidb/pkg/util/compress" @@ -87,7 +88,13 @@ func dumpJSONCol(hist *statistics.Histogram, cmsketch *statistics.CMSketch, topn } // GenJSONTableFromStats generate jsonTable from tableInfo and stats -func GenJSONTableFromStats(sctx sessionctx.Context, dbName string, tableInfo *model.TableInfo, tbl *statistics.Table) (*util.JSONTable, error) { +func GenJSONTableFromStats( + sctx sessionctx.Context, + dbName string, + tableInfo *model.TableInfo, + tbl *statistics.Table, + colStatsUsage map[model.TableItemID]statstypes.ColStatsTimeInfo, +) (*util.JSONTable, error) { tracker := memory.NewTracker(memory.LabelForAnalyzeMemory, -1) tracker.AttachTo(sctx.GetSessionVars().MemTracker) defer tracker.Detach() @@ -135,6 +142,27 @@ func GenJSONTableFromStats(sctx sessionctx.Context, dbName string, tableInfo *mo return nil, outerErr } jsonTbl.ExtStats = dumpJSONExtendedStats(tbl.ExtendedStats) + if colStatsUsage != nil { + // nilIfNil checks if the provided *time.Time is nil and returns a nil or its string representation accordingly. + nilIfNil := func(t *types.Time) *string { + if t == nil { + return nil + } + s := t.String() + return &s + } + jsonColStatsUsage := make([]*util.JSONPredicateColumn, 0, len(colStatsUsage)) + for id, usage := range colStatsUsage { + jsonCol := &util.JSONPredicateColumn{ + ID: id.ID, + LastUsedAt: nilIfNil(usage.LastUsedAt), + LastAnalyzedAt: nilIfNil(usage.LastAnalyzedAt), + } + jsonColStatsUsage = append(jsonColStatsUsage, jsonCol) + } + jsonTbl.PredicateColumns = jsonColStatsUsage + } + return jsonTbl, nil } diff --git a/pkg/statistics/handle/storage/stats_read_writer.go b/pkg/statistics/handle/storage/stats_read_writer.go index 9c8cc307c8e12..60ed54de04393 100644 --- a/pkg/statistics/handle/storage/stats_read_writer.go +++ b/pkg/statistics/handle/storage/stats_read_writer.go @@ -20,6 +20,7 @@ import ( "runtime" "sync" "sync/atomic" + "time" "github.com/pingcap/errors" "github.com/pingcap/tidb/pkg/config" @@ -32,6 +33,7 @@ import ( "github.com/pingcap/tidb/pkg/statistics" handle_metrics "github.com/pingcap/tidb/pkg/statistics/handle/metrics" statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types" + "github.com/pingcap/tidb/pkg/statistics/handle/usage/predicatecolumn" "github.com/pingcap/tidb/pkg/statistics/handle/util" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/intest" @@ -561,7 +563,12 @@ func (s *statsReadWriter) TableStatsToJSON(dbName string, tableInfo *model.Table if err != nil { return err } - jsonTbl, err = GenJSONTableFromStats(sctx, dbName, tableInfo, tbl) + // Note: Because we don't show this information in the session directly, so we can always use UTC here. + colStatsUsage, err := predicatecolumn.LoadColumnStatsUsageForTable(sctx, time.UTC, physicalID) + if err != nil { + return err + } + jsonTbl, err = GenJSONTableFromStats(sctx, dbName, tableInfo, tbl, colStatsUsage) return err }) if err != nil { @@ -717,5 +724,47 @@ func (s *statsReadWriter) loadStatsFromJSON(tableInfo *model.TableInfo, physical if err != nil { return errors.Trace(err) } + err = s.SaveColumnStatsUsageToStorage(tbl.PhysicalID, jsonTbl.PredicateColumns) + if err != nil { + return errors.Trace(err) + } return s.SaveMetaToStorage(tbl.PhysicalID, tbl.RealtimeCount, tbl.ModifyCount, util.StatsMetaHistorySourceLoadStats) } + +// SaveColumnStatsUsageToStorage saves column statistics usage information for a table into mysql.column_stats_usage. +func (s *statsReadWriter) SaveColumnStatsUsageToStorage(physicalID int64, predicateColumns []*util.JSONPredicateColumn) error { + return util.CallWithSCtx(s.statsHandler.SPool(), func(sctx sessionctx.Context) error { + colStatsUsage := make(map[model.TableItemID]statstypes.ColStatsTimeInfo, len(predicateColumns)) + for _, col := range predicateColumns { + if col == nil { + continue + } + itemID := model.TableItemID{TableID: physicalID, ID: col.ID} + lastUsedAt, err := parseTimeOrNil(col.LastUsedAt) + if err != nil { + return err + } + lastAnalyzedAt, err := parseTimeOrNil(col.LastAnalyzedAt) + if err != nil { + return err + } + colStatsUsage[itemID] = statstypes.ColStatsTimeInfo{ + LastUsedAt: lastUsedAt, + LastAnalyzedAt: lastAnalyzedAt, + } + } + return predicatecolumn.SaveColumnStatsUsageForTable(sctx, colStatsUsage) + }, util.FlagWrapTxn) +} + +func parseTimeOrNil(timeStr *string) (*types.Time, error) { + if timeStr == nil { + return nil, nil + } + // DefaultStmtNoWarningContext use UTC timezone. + parsedTime, err := types.ParseTime(types.DefaultStmtNoWarningContext, *timeStr, mysql.TypeTimestamp, types.MaxFsp) + if err != nil { + return nil, err + } + return &parsedTime, nil +} diff --git a/pkg/statistics/handle/usage/BUILD.bazel b/pkg/statistics/handle/usage/BUILD.bazel index 87819d8d29c21..092b379fe35ee 100644 --- a/pkg/statistics/handle/usage/BUILD.bazel +++ b/pkg/statistics/handle/usage/BUILD.bazel @@ -13,20 +13,17 @@ go_library( "//pkg/infoschema", "//pkg/metrics", "//pkg/parser/model", - "//pkg/parser/mysql", "//pkg/sessionctx", "//pkg/sessionctx/variable", - "//pkg/statistics", "//pkg/statistics/handle/storage", "//pkg/statistics/handle/types", "//pkg/statistics/handle/usage/indexusage", + "//pkg/statistics/handle/usage/predicatecolumn", "//pkg/statistics/handle/util", "//pkg/types", "//pkg/util", - "//pkg/util/logutil", "//pkg/util/sqlescape", "@com_github_pingcap_errors//:errors", - "@org_uber_go_zap//:zap", ], ) diff --git a/pkg/statistics/handle/usage/predicate_column.go b/pkg/statistics/handle/usage/predicate_column.go index dbc5d23f7c84e..2531bc23bae92 100644 --- a/pkg/statistics/handle/usage/predicate_column.go +++ b/pkg/statistics/handle/usage/predicate_column.go @@ -15,22 +15,14 @@ package usage import ( - "encoding/json" - "fmt" "time" - "github.com/pingcap/errors" - "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/parser/model" - "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/sessionctx" - "github.com/pingcap/tidb/pkg/statistics" statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types" "github.com/pingcap/tidb/pkg/statistics/handle/usage/indexusage" + "github.com/pingcap/tidb/pkg/statistics/handle/usage/predicatecolumn" utilstats "github.com/pingcap/tidb/pkg/statistics/handle/util" - "github.com/pingcap/tidb/pkg/types" - "github.com/pingcap/tidb/pkg/util/logutil" - "go.uber.org/zap" ) // statsUsageImpl implements statstypes.StatsUsage. @@ -55,7 +47,7 @@ func NewStatsUsageImpl(statsHandle statstypes.StatsHandle) statstypes.StatsUsage // LoadColumnStatsUsage returns all columns' usage information. func (u *statsUsageImpl) LoadColumnStatsUsage(loc *time.Location) (colStatsMap map[model.TableItemID]statstypes.ColStatsTimeInfo, err error) { err = utilstats.CallWithSCtx(u.statsHandle.SPool(), func(sctx sessionctx.Context) error { - colStatsMap, err = LoadColumnStatsUsage(sctx, loc) + colStatsMap, err = predicatecolumn.LoadColumnStatsUsage(sctx, loc) return err }) return @@ -64,7 +56,7 @@ func (u *statsUsageImpl) LoadColumnStatsUsage(loc *time.Location) (colStatsMap m // GetPredicateColumns returns IDs of predicate columns, which are the columns whose stats are used(needed) when generating query plans. func (u *statsUsageImpl) GetPredicateColumns(tableID int64) (columnIDs []int64, err error) { err = utilstats.CallWithSCtx(u.statsHandle.SPool(), func(sctx sessionctx.Context) error { - columnIDs, err = GetPredicateColumns(sctx, tableID) + columnIDs, err = predicatecolumn.GetPredicateColumns(sctx, tableID) return err }, utilstats.FlagWrapTxn) return @@ -73,122 +65,8 @@ func (u *statsUsageImpl) GetPredicateColumns(tableID int64) (columnIDs []int64, // CollectColumnsInExtendedStats returns IDs of the columns involved in extended stats. func (u *statsUsageImpl) CollectColumnsInExtendedStats(tableID int64) (columnIDs []int64, err error) { err = utilstats.CallWithSCtx(u.statsHandle.SPool(), func(sctx sessionctx.Context) error { - columnIDs, err = CollectColumnsInExtendedStats(sctx, tableID) + columnIDs, err = predicatecolumn.CollectColumnsInExtendedStats(sctx, tableID) return err }) return } - -// LoadColumnStatsUsage loads column stats usage information from disk. -func LoadColumnStatsUsage(sctx sessionctx.Context, loc *time.Location) (map[model.TableItemID]statstypes.ColStatsTimeInfo, error) { - // Since we use another session from session pool to read mysql.column_stats_usage, which may have different @@time_zone, so we do time zone conversion here. - rows, _, err := utilstats.ExecRows(sctx, "SELECT table_id, column_id, CONVERT_TZ(last_used_at, @@TIME_ZONE, '+00:00'), CONVERT_TZ(last_analyzed_at, @@TIME_ZONE, '+00:00') FROM mysql.column_stats_usage") - if err != nil { - return nil, errors.Trace(err) - } - colStatsMap := make(map[model.TableItemID]statstypes.ColStatsTimeInfo, len(rows)) - for _, row := range rows { - if row.IsNull(0) || row.IsNull(1) { - continue - } - tblColID := model.TableItemID{TableID: row.GetInt64(0), ID: row.GetInt64(1), IsIndex: false} - var statsUsage statstypes.ColStatsTimeInfo - if !row.IsNull(2) { - gt, err := row.GetTime(2).GoTime(time.UTC) - if err != nil { - return nil, errors.Trace(err) - } - t := types.NewTime(types.FromGoTime(gt.In(loc)), mysql.TypeTimestamp, types.DefaultFsp) - statsUsage.LastUsedAt = &t - } - if !row.IsNull(3) { - gt, err := row.GetTime(3).GoTime(time.UTC) - if err != nil { - return nil, errors.Trace(err) - } - t := types.NewTime(types.FromGoTime(gt.In(loc)), mysql.TypeTimestamp, types.DefaultFsp) - statsUsage.LastAnalyzedAt = &t - } - colStatsMap[tblColID] = statsUsage - } - return colStatsMap, nil -} - -// GetPredicateColumns returns IDs of predicate columns, which are the columns whose stats are used(needed) when generating query plans. -func GetPredicateColumns(sctx sessionctx.Context, tableID int64) ([]int64, error) { - // Each time we retrieve the predicate columns, we also attempt to remove any column stats usage information whose column is dropped. - err := cleanupDroppedColumnStatsUsage(sctx, tableID) - if err != nil { - return nil, errors.Trace(err) - } - rows, _, err := utilstats.ExecRows( - sctx, - "SELECT column_id, CONVERT_TZ(last_used_at, @@TIME_ZONE, '+00:00') FROM mysql.column_stats_usage WHERE table_id = %? AND last_used_at IS NOT NULL", - tableID, - ) - if err != nil { - return nil, errors.Trace(err) - } - columnIDs := make([]int64, 0, len(rows)) - for _, row := range rows { - // Usually, it should not be NULL. - // This only happens when the last_used_at is not a valid time. - if row.IsNull(1) { - continue - } - colID := row.GetInt64(0) - columnIDs = append(columnIDs, colID) - } - return columnIDs, nil -} - -// cleanupDroppedColumnStatsUsage deletes the column stats usage information whose column is dropped. -func cleanupDroppedColumnStatsUsage(sctx sessionctx.Context, tableID int64) error { - is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) - table, ok := is.TableByID(tableID) - if !ok { - // Usually, it should not happen. - // But if it happens, we can safely do nothing. - return nil - } - allColumns := table.Meta().Columns - // Due to SQL limitations, column IDs must be converted to strings for proper escaping in the query :( - columnIDs := make([]string, 0, len(allColumns)) - for _, col := range allColumns { - columnIDs = append(columnIDs, fmt.Sprintf("%d", col.ID)) - } - - // Delete the column stats usage information whose column is dropped. - _, _, err := utilstats.ExecRows( - sctx, - "DELETE FROM mysql.column_stats_usage WHERE table_id = %? AND column_id NOT IN (%?)", - tableID, - columnIDs, - ) - - return err -} - -// CollectColumnsInExtendedStats returns IDs of the columns involved in extended stats. -func CollectColumnsInExtendedStats(sctx sessionctx.Context, tableID int64) ([]int64, error) { - const sql = "SELECT name, type, column_ids FROM mysql.stats_extended WHERE table_id = %? and status in (%?, %?)" - rows, _, err := utilstats.ExecRows(sctx, sql, tableID, statistics.ExtendedStatsAnalyzed, statistics.ExtendedStatsInited) - if err != nil { - return nil, errors.Trace(err) - } - if len(rows) == 0 { - return nil, nil - } - columnIDs := make([]int64, 0, len(rows)*2) - for _, row := range rows { - twoIDs := make([]int64, 0, 2) - data := row.GetString(2) - err := json.Unmarshal([]byte(data), &twoIDs) - if err != nil { - logutil.BgLogger().Error("invalid column_ids in mysql.stats_extended, skip collecting extended stats for this row", zap.String("column_ids", data), zap.Error(err)) - continue - } - columnIDs = append(columnIDs, twoIDs...) - } - return columnIDs, nil -} diff --git a/pkg/statistics/handle/usage/predicatecolumn/BUILD.bazel b/pkg/statistics/handle/usage/predicatecolumn/BUILD.bazel new file mode 100644 index 0000000000000..d93d212de06b1 --- /dev/null +++ b/pkg/statistics/handle/usage/predicatecolumn/BUILD.bazel @@ -0,0 +1,21 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "predicatecolumn", + srcs = ["predicate_column.go"], + importpath = "github.com/pingcap/tidb/pkg/statistics/handle/usage/predicatecolumn", + visibility = ["//visibility:public"], + deps = [ + "//pkg/infoschema", + "//pkg/parser/model", + "//pkg/parser/mysql", + "//pkg/sessionctx", + "//pkg/statistics", + "//pkg/statistics/handle/types", + "//pkg/statistics/handle/util", + "//pkg/types", + "//pkg/util/logutil", + "@com_github_pingcap_errors//:errors", + "@org_uber_go_zap//:zap", + ], +) diff --git a/pkg/statistics/handle/usage/predicatecolumn/predicate_column.go b/pkg/statistics/handle/usage/predicatecolumn/predicate_column.go new file mode 100644 index 0000000000000..89c02c2b73d71 --- /dev/null +++ b/pkg/statistics/handle/usage/predicatecolumn/predicate_column.go @@ -0,0 +1,184 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package predicatecolumn + +import ( + "encoding/json" + "fmt" + "time" + + "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/infoschema" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/sessionctx" + "github.com/pingcap/tidb/pkg/statistics" + statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types" + utilstats "github.com/pingcap/tidb/pkg/statistics/handle/util" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/logutil" + "go.uber.org/zap" +) + +// loadColumnStatsUsage is a helper function to load column stats usage information from disk. +func loadColumnStatsUsage(sctx sessionctx.Context, loc *time.Location, query string, args ...any) (map[model.TableItemID]statstypes.ColStatsTimeInfo, error) { + rows, _, err := utilstats.ExecRows(sctx, query, args...) + if err != nil { + return nil, errors.Trace(err) + } + colStatsMap := make(map[model.TableItemID]statstypes.ColStatsTimeInfo, len(rows)) + for _, row := range rows { + if row.IsNull(0) || row.IsNull(1) { + continue + } + tblColID := model.TableItemID{TableID: row.GetInt64(0), ID: row.GetInt64(1), IsIndex: false} + var statsUsage statstypes.ColStatsTimeInfo + if !row.IsNull(2) { + gt, err := row.GetTime(2).GoTime(time.UTC) + if err != nil { + return nil, errors.Trace(err) + } + t := types.NewTime(types.FromGoTime(gt.In(loc)), mysql.TypeTimestamp, types.DefaultFsp) + statsUsage.LastUsedAt = &t + } + if !row.IsNull(3) { + gt, err := row.GetTime(3).GoTime(time.UTC) + if err != nil { + return nil, errors.Trace(err) + } + t := types.NewTime(types.FromGoTime(gt.In(loc)), mysql.TypeTimestamp, types.DefaultFsp) + statsUsage.LastAnalyzedAt = &t + } + colStatsMap[tblColID] = statsUsage + } + return colStatsMap, nil +} + +// LoadColumnStatsUsage loads column stats usage information from disk. +func LoadColumnStatsUsage(sctx sessionctx.Context, loc *time.Location) (map[model.TableItemID]statstypes.ColStatsTimeInfo, error) { + query := "SELECT table_id, column_id, CONVERT_TZ(last_used_at, @@TIME_ZONE, '+00:00'), CONVERT_TZ(last_analyzed_at, @@TIME_ZONE, '+00:00') FROM mysql.column_stats_usage" + return loadColumnStatsUsage(sctx, loc, query) +} + +// LoadColumnStatsUsageForTable loads column stats usage information for a specific table from disk. +func LoadColumnStatsUsageForTable(sctx sessionctx.Context, loc *time.Location, tableID int64) (map[model.TableItemID]statstypes.ColStatsTimeInfo, error) { + query := "SELECT table_id, column_id, CONVERT_TZ(last_used_at, @@TIME_ZONE, '+00:00'), CONVERT_TZ(last_analyzed_at, @@TIME_ZONE, '+00:00') FROM mysql.column_stats_usage WHERE table_id = %?" + return loadColumnStatsUsage(sctx, loc, query, tableID) +} + +// GetPredicateColumns returns IDs of predicate columns, which are the columns whose stats are used(needed) when generating query plans. +func GetPredicateColumns(sctx sessionctx.Context, tableID int64) ([]int64, error) { + // Each time we retrieve the predicate columns, we also attempt to remove any column stats usage information whose column is dropped. + err := cleanupDroppedColumnStatsUsage(sctx, tableID) + if err != nil { + return nil, errors.Trace(err) + } + rows, _, err := utilstats.ExecRows( + sctx, + "SELECT column_id, CONVERT_TZ(last_used_at, @@TIME_ZONE, '+00:00') FROM mysql.column_stats_usage WHERE table_id = %? AND last_used_at IS NOT NULL", + tableID, + ) + if err != nil { + return nil, errors.Trace(err) + } + columnIDs := make([]int64, 0, len(rows)) + for _, row := range rows { + // Usually, it should not be NULL. + // This only happens when the last_used_at is not a valid time. + if row.IsNull(1) { + continue + } + colID := row.GetInt64(0) + columnIDs = append(columnIDs, colID) + } + return columnIDs, nil +} + +// cleanupDroppedColumnStatsUsage deletes the column stats usage information whose column is dropped. +func cleanupDroppedColumnStatsUsage(sctx sessionctx.Context, tableID int64) error { + is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) + table, ok := is.TableByID(tableID) + if !ok { + // Usually, it should not happen. + // But if it happens, we can safely do nothing. + return nil + } + allColumns := table.Meta().Columns + // Due to SQL limitations, column IDs must be converted to strings for proper escaping in the query :( + columnIDs := make([]string, 0, len(allColumns)) + for _, col := range allColumns { + columnIDs = append(columnIDs, fmt.Sprintf("%d", col.ID)) + } + + // Delete the column stats usage information whose column is dropped. + _, _, err := utilstats.ExecRows( + sctx, + "DELETE FROM mysql.column_stats_usage WHERE table_id = %? AND column_id NOT IN (%?)", + tableID, + columnIDs, + ) + + return err +} + +// SaveColumnStatsUsageForTable saves column stats usage information for a specific table to disk. +func SaveColumnStatsUsageForTable( + sctx sessionctx.Context, + colStatsUsage map[model.TableItemID]statstypes.ColStatsTimeInfo, +) error { + for colID, statsUsage := range colStatsUsage { + lastUsedAt := "NULL" + if statsUsage.LastUsedAt != nil { + lastUsedAt = statsUsage.LastUsedAt.String() + } + lastAnalyzedAt := "NULL" + if statsUsage.LastAnalyzedAt != nil { + lastAnalyzedAt = statsUsage.LastAnalyzedAt.String() + } + _, _, err := utilstats.ExecRows( + sctx, + "REPLACE INTO mysql.column_stats_usage (table_id, column_id, last_used_at, last_analyzed_at) VALUES (%?, %?, CONVERT_TZ(%?, '+00:00', @@TIME_ZONE), CONVERT_TZ(%?, '+00:00', @@TIME_ZONE))", + colID.TableID, colID.ID, lastUsedAt, lastAnalyzedAt, + ) + if err != nil { + return errors.Trace(err) + } + } + return nil +} + +// CollectColumnsInExtendedStats returns IDs of the columns involved in extended stats. +func CollectColumnsInExtendedStats(sctx sessionctx.Context, tableID int64) ([]int64, error) { + const sql = "SELECT name, type, column_ids FROM mysql.stats_extended WHERE table_id = %? and status in (%?, %?)" + rows, _, err := utilstats.ExecRows(sctx, sql, tableID, statistics.ExtendedStatsAnalyzed, statistics.ExtendedStatsInited) + if err != nil { + return nil, errors.Trace(err) + } + if len(rows) == 0 { + return nil, nil + } + columnIDs := make([]int64, 0, len(rows)*2) + for _, row := range rows { + twoIDs := make([]int64, 0, 2) + data := row.GetString(2) + err := json.Unmarshal([]byte(data), &twoIDs) + if err != nil { + logutil.BgLogger().Error("invalid column_ids in mysql.stats_extended, skip collecting extended stats for this row", zap.String("column_ids", data), zap.Error(err)) + continue + } + columnIDs = append(columnIDs, twoIDs...) + } + return columnIDs, nil +} diff --git a/pkg/statistics/handle/util/util.go b/pkg/statistics/handle/util/util.go index 1db8addc5fae3..3573a125f4973 100644 --- a/pkg/statistics/handle/util/util.go +++ b/pkg/statistics/handle/util/util.go @@ -250,6 +250,7 @@ type JSONTable struct { DatabaseName string `json:"database_name"` TableName string `json:"table_name"` ExtStats []*JSONExtendedStats `json:"ext_stats"` + PredicateColumns []*JSONPredicateColumn `json:"predicate_columns"` Count int64 `json:"count"` ModifyCount int64 `json:"modify_count"` Version uint64 `json:"version"` @@ -291,3 +292,10 @@ func (col *JSONColumn) TotalMemoryUsage() (size int64) { } return size } + +// JSONPredicateColumn contains the information of the columns used in the predicate. +type JSONPredicateColumn struct { + LastUsedAt *string `json:"last_used_at"` + LastAnalyzedAt *string `json:"last_analyzed_at"` + ID int64 `json:"id"` +}