Skip to content

Commit

Permalink
br: support backup and restore column_stats_usage (#54634)
Browse files Browse the repository at this point in the history
ref #53567
  • Loading branch information
Rustin170506 authored Jul 19, 2024
1 parent ddcaadb commit cb6f913
Show file tree
Hide file tree
Showing 10 changed files with 346 additions and 137 deletions.
7 changes: 3 additions & 4 deletions br/pkg/restore/snap_client/systable_restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ var statsTables = map[string]map[string]struct{}{
"stats_meta_history": {},
"stats_table_locked": {},
"stats_top_n": {},
"column_stats_usage": {},
},
}

Expand All @@ -57,10 +58,8 @@ var sysPrivilegeTableMap = map[string]string{
var unRecoverableTable = map[string]map[string]struct{}{
"mysql": {
// some variables in tidb (e.g. gc_safe_point) cannot be recovered.
"tidb": {},
"global_variables": {},

"column_stats_usage": {},
"tidb": {},
"global_variables": {},
"capture_plan_baselines_blacklist": {},
// gc info don't need to recover.
"gc_delete_range": {},
Expand Down
3 changes: 2 additions & 1 deletion pkg/statistics/handle/storage/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ go_library(
"//pkg/statistics/handle/logutil",
"//pkg/statistics/handle/metrics",
"//pkg/statistics/handle/types",
"//pkg/statistics/handle/usage/predicatecolumn",
"//pkg/statistics/handle/util",
"//pkg/types",
"//pkg/util/chunk",
Expand Down Expand Up @@ -56,7 +57,7 @@ go_test(
"stats_read_writer_test.go",
],
flaky = True,
shard_count = 22,
shard_count = 23,
deps = [
":storage",
"//pkg/domain",
Expand Down
44 changes: 44 additions & 0 deletions pkg/statistics/handle/storage/dump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,48 @@ func TestLoadPartitionStats(t *testing.T) {
requireTableEqual(t, originGlobalStats, dom.StatsHandle().GetTableStats(tableInfo))
}

func TestLoadPredicateColumns(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")

tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a int, b int, c int, primary key(a), index idx(b))")
tk.MustExec("insert into t values (1, 2, 3), (2, 3, 4), (3, 4, 5)")
tk.MustExec("select * from t where b = 1")
is := dom.InfoSchema()
h := dom.StatsHandle()
require.NoError(t, h.Update(context.Background(), is))
require.NoError(t, h.DumpColStatsUsageToKV())
tk.MustExec("analyze table t")

table, err := is.TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
tableInfo := table.Meta()
jsonTbl, err := h.DumpStatsToJSON("test", tableInfo, nil, true)
require.NoError(t, err)

// remove all statistics
tk.MustExec("delete from mysql.stats_meta")
tk.MustExec("delete from mysql.stats_histograms")
tk.MustExec("delete from mysql.stats_buckets")
tk.MustExec("delete from mysql.column_stats_usage")
h.Clear()

// load stats back
require.NoError(t, h.LoadStatsFromJSON(context.Background(), is, jsonTbl, 0))

// check column stats usage
rows := tk.MustQuery("select table_id, column_id, last_used_at, last_analyzed_at from mysql.column_stats_usage order by column_id").Rows()
require.Equal(t, 2, len(rows))
require.Equal(t, "1", rows[0][1].(string))
require.Equal(t, "<nil>", rows[0][2], "It hasn't been used since last analyze")
require.NotEqual(t, "<nil>", rows[0][3])
require.Equal(t, "2", rows[1][1].(string))
require.NotEqual(t, "<nil>", rows[1][2])
require.NotEqual(t, "<nil>", rows[1][3])
}

func TestLoadPartitionStatsErrPanic(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
Expand Down Expand Up @@ -666,6 +708,7 @@ PARTITION BY RANGE ( a ) (
persistStats(ctx, t, dom, "test", "t1", func(ctx context.Context, jsonTable *handleutil.JSONTable, physicalID int64) error {
require.True(t, physicalID > 0)
require.NotNil(t, jsonTable)
require.NotNil(t, jsonTable.PredicateColumns)
statsCnt += 1
return nil
})
Expand All @@ -674,6 +717,7 @@ PARTITION BY RANGE ( a ) (
persistStats(ctx, t, dom, "test", "t2", func(ctx context.Context, jsonTable *handleutil.JSONTable, physicalID int64) error {
require.True(t, physicalID > 0)
require.NotNil(t, jsonTable)
require.NotNil(t, jsonTable.PredicateColumns)
statsCnt += 1
return nil
})
Expand Down
30 changes: 29 additions & 1 deletion pkg/statistics/handle/storage/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/statistics"
statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types"
"github.com/pingcap/tidb/pkg/statistics/handle/util"
"github.com/pingcap/tidb/pkg/types"
compressutil "github.com/pingcap/tidb/pkg/util/compress"
Expand Down Expand Up @@ -87,7 +88,13 @@ func dumpJSONCol(hist *statistics.Histogram, cmsketch *statistics.CMSketch, topn
}

// GenJSONTableFromStats generate jsonTable from tableInfo and stats
func GenJSONTableFromStats(sctx sessionctx.Context, dbName string, tableInfo *model.TableInfo, tbl *statistics.Table) (*util.JSONTable, error) {
func GenJSONTableFromStats(
sctx sessionctx.Context,
dbName string,
tableInfo *model.TableInfo,
tbl *statistics.Table,
colStatsUsage map[model.TableItemID]statstypes.ColStatsTimeInfo,
) (*util.JSONTable, error) {
tracker := memory.NewTracker(memory.LabelForAnalyzeMemory, -1)
tracker.AttachTo(sctx.GetSessionVars().MemTracker)
defer tracker.Detach()
Expand Down Expand Up @@ -135,6 +142,27 @@ func GenJSONTableFromStats(sctx sessionctx.Context, dbName string, tableInfo *mo
return nil, outerErr
}
jsonTbl.ExtStats = dumpJSONExtendedStats(tbl.ExtendedStats)
if colStatsUsage != nil {
// nilIfNil checks if the provided *time.Time is nil and returns a nil or its string representation accordingly.
nilIfNil := func(t *types.Time) *string {
if t == nil {
return nil
}
s := t.String()
return &s
}
jsonColStatsUsage := make([]*util.JSONPredicateColumn, 0, len(colStatsUsage))
for id, usage := range colStatsUsage {
jsonCol := &util.JSONPredicateColumn{
ID: id.ID,
LastUsedAt: nilIfNil(usage.LastUsedAt),
LastAnalyzedAt: nilIfNil(usage.LastAnalyzedAt),
}
jsonColStatsUsage = append(jsonColStatsUsage, jsonCol)
}
jsonTbl.PredicateColumns = jsonColStatsUsage
}

return jsonTbl, nil
}

Expand Down
51 changes: 50 additions & 1 deletion pkg/statistics/handle/storage/stats_read_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"runtime"
"sync"
"sync/atomic"
"time"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/pkg/config"
Expand All @@ -32,6 +33,7 @@ import (
"github.com/pingcap/tidb/pkg/statistics"
handle_metrics "github.com/pingcap/tidb/pkg/statistics/handle/metrics"
statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types"
"github.com/pingcap/tidb/pkg/statistics/handle/usage/predicatecolumn"
"github.com/pingcap/tidb/pkg/statistics/handle/util"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/intest"
Expand Down Expand Up @@ -561,7 +563,12 @@ func (s *statsReadWriter) TableStatsToJSON(dbName string, tableInfo *model.Table
if err != nil {
return err
}
jsonTbl, err = GenJSONTableFromStats(sctx, dbName, tableInfo, tbl)
// Note: Because we don't show this information in the session directly, so we can always use UTC here.
colStatsUsage, err := predicatecolumn.LoadColumnStatsUsageForTable(sctx, time.UTC, physicalID)
if err != nil {
return err
}
jsonTbl, err = GenJSONTableFromStats(sctx, dbName, tableInfo, tbl, colStatsUsage)
return err
})
if err != nil {
Expand Down Expand Up @@ -717,5 +724,47 @@ func (s *statsReadWriter) loadStatsFromJSON(tableInfo *model.TableInfo, physical
if err != nil {
return errors.Trace(err)
}
err = s.SaveColumnStatsUsageToStorage(tbl.PhysicalID, jsonTbl.PredicateColumns)
if err != nil {
return errors.Trace(err)
}
return s.SaveMetaToStorage(tbl.PhysicalID, tbl.RealtimeCount, tbl.ModifyCount, util.StatsMetaHistorySourceLoadStats)
}

// SaveColumnStatsUsageToStorage saves column statistics usage information for a table into mysql.column_stats_usage.
func (s *statsReadWriter) SaveColumnStatsUsageToStorage(physicalID int64, predicateColumns []*util.JSONPredicateColumn) error {
return util.CallWithSCtx(s.statsHandler.SPool(), func(sctx sessionctx.Context) error {
colStatsUsage := make(map[model.TableItemID]statstypes.ColStatsTimeInfo, len(predicateColumns))
for _, col := range predicateColumns {
if col == nil {
continue
}
itemID := model.TableItemID{TableID: physicalID, ID: col.ID}
lastUsedAt, err := parseTimeOrNil(col.LastUsedAt)
if err != nil {
return err
}
lastAnalyzedAt, err := parseTimeOrNil(col.LastAnalyzedAt)
if err != nil {
return err
}
colStatsUsage[itemID] = statstypes.ColStatsTimeInfo{
LastUsedAt: lastUsedAt,
LastAnalyzedAt: lastAnalyzedAt,
}
}
return predicatecolumn.SaveColumnStatsUsageForTable(sctx, colStatsUsage)
}, util.FlagWrapTxn)
}

func parseTimeOrNil(timeStr *string) (*types.Time, error) {
if timeStr == nil {
return nil, nil
}
// DefaultStmtNoWarningContext use UTC timezone.
parsedTime, err := types.ParseTime(types.DefaultStmtNoWarningContext, *timeStr, mysql.TypeTimestamp, types.MaxFsp)
if err != nil {
return nil, err
}
return &parsedTime, nil
}
5 changes: 1 addition & 4 deletions pkg/statistics/handle/usage/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,17 @@ go_library(
"//pkg/infoschema",
"//pkg/metrics",
"//pkg/parser/model",
"//pkg/parser/mysql",
"//pkg/sessionctx",
"//pkg/sessionctx/variable",
"//pkg/statistics",
"//pkg/statistics/handle/storage",
"//pkg/statistics/handle/types",
"//pkg/statistics/handle/usage/indexusage",
"//pkg/statistics/handle/usage/predicatecolumn",
"//pkg/statistics/handle/util",
"//pkg/types",
"//pkg/util",
"//pkg/util/logutil",
"//pkg/util/sqlescape",
"@com_github_pingcap_errors//:errors",
"@org_uber_go_zap//:zap",
],
)

Expand Down
Loading

0 comments on commit cb6f913

Please sign in to comment.