Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: include both indexes and columns in job info #54336

Merged
merged 14 commits into from
Jul 3, 2024
2 changes: 1 addition & 1 deletion pkg/executor/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ func (e *AnalyzeExec) Next(ctx context.Context, _ *chunk.Chunk) error {
return e.handleResultsError(ctx, concurrency, needGlobalStats, globalStatsMap, resultsCh, len(tasks))
})
for _, task := range tasks {
prepareV2AnalyzeJobInfo(task.colExec, false)
prepareV2AnalyzeJobInfo(task.colExec)
AddNewAnalyzeJob(e.Ctx(), task.job)
}
failpoint.Inject("mockKillPendingAnalyzeJob", func() {
Expand Down
82 changes: 64 additions & 18 deletions pkg/executor/analyze_col.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,29 +384,23 @@ func hasPkHist(handleCols plannerutil.HandleCols) bool {
return handleCols != nil && handleCols.IsInt()
}

func prepareV2AnalyzeJobInfo(e *AnalyzeColumnsExec, retry bool) {
if e == nil || e.StatsVersion != statistics.Version2 {
return
}
opts := e.opts
// prepareColumns prepares the columns for the analyze job.
func prepareColumns(e *AnalyzeColumnsExec, b *strings.Builder) {
cols := e.colsInfo
if e.V2Options != nil {
opts = e.V2Options.FilledOpts
}
sampleRate := *e.analyzePB.ColReq.SampleRate
var b strings.Builder
if retry {
b.WriteString("retry ")
}
if e.ctx.GetSessionVars().InRestrictedSQL {
b.WriteString("auto ")
}
b.WriteString("analyze table")
// Ignore the _row_id column.
if len(cols) > 0 && cols[len(cols)-1].ID == model.ExtraHandleID {
cols = cols[:len(cols)-1]
}
// If there are no columns, skip the process.
if len(cols) == 0 {
return
}
if len(cols) < len(e.tableInfo.Columns) {
b.WriteString(" columns ")
if len(cols) > 1 {
b.WriteString(" columns ")
} else {
b.WriteString(" column ")
}
for i, col := range cols {
if i > 0 {
b.WriteString(", ")
Expand All @@ -416,6 +410,58 @@ func prepareV2AnalyzeJobInfo(e *AnalyzeColumnsExec, retry bool) {
} else {
b.WriteString(" all columns")
}
}

// prepareIndexes prepares the indexes for the analyze job.
func prepareIndexes(e *AnalyzeColumnsExec, b *strings.Builder) {
indexes := e.indexes

// If there are no indexes, skip the process.
if len(indexes) == 0 {
return
}
if len(indexes) < len(e.tableInfo.Indices) {
if len(indexes) > 1 {
b.WriteString(" indexes ")
} else {
b.WriteString(" index ")
}
for i, index := range indexes {
if i > 0 {
b.WriteString(", ")
}
b.WriteString(index.Name.O)
}
} else {
b.WriteString(" all indexes")
}
}

// prepareV2AnalyzeJobInfo prepares the job info for the analyze job.
func prepareV2AnalyzeJobInfo(e *AnalyzeColumnsExec) {
// For v1, we analyze all columns in a single job, so we don't need to set the job info.
if e == nil || e.StatsVersion != statistics.Version2 {
return
}

opts := e.opts
if e.V2Options != nil {
opts = e.V2Options.FilledOpts
}
sampleRate := *e.analyzePB.ColReq.SampleRate
var b strings.Builder
// If it is an internal SQL, it means it is triggered by the system itself(auto-analyze).
if e.ctx.GetSessionVars().InRestrictedSQL {
b.WriteString("auto ")
}
b.WriteString("analyze table")

prepareIndexes(e, &b)
if len(e.indexes) > 0 && len(e.colsInfo) > 0 {
b.WriteString(",")
}
prepareColumns(e, &b)

var needComma bool
b.WriteString(" with ")
printOption := func(optType ast.AnalyzeOptionType) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/executor/show_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ func TestShowAnalyzeStatus(t *testing.T) {
require.Equal(t, "test", rows[0][0])
require.Equal(t, "t", rows[0][1])
require.Equal(t, "", rows[0][2])
require.Equal(t, "analyze table all columns with 256 buckets, 100 topn, 1 samplerate", rows[0][3])
require.Equal(t, "analyze table all indexes, all columns with 256 buckets, 100 topn, 1 samplerate", rows[0][3])
require.Equal(t, "2", rows[0][4])
checkTime := func(val any) {
str, ok := val.(string)
Expand Down
14 changes: 7 additions & 7 deletions pkg/executor/test/analyzetest/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2156,19 +2156,19 @@ func TestShowAanalyzeStatusJobInfo(t *testing.T) {
require.Equal(t, expected, rows[0][3])
tk.MustExec("delete from mysql.analyze_jobs")
}
checkJobInfo("analyze table columns b, c, d with 2 buckets, 2 topn, 1 samplerate")
checkJobInfo("analyze table all indexes, columns b, c, d with 2 buckets, 2 topn, 1 samplerate")
tk.MustExec("set global tidb_persist_analyze_options = 1")
tk.MustExec("select * from t where c > 1")
h := dom.StatsHandle()
require.NoError(t, h.DumpColStatsUsageToKV())
tk.MustExec("analyze table t predicate columns with 2 topn, 2 buckets")
checkJobInfo("analyze table columns b, c, d with 2 buckets, 2 topn, 1 samplerate")
checkJobInfo("analyze table all indexes, columns b, c, d with 2 buckets, 2 topn, 1 samplerate")
tk.MustExec("analyze table t")
checkJobInfo("analyze table columns b, c, d with 2 buckets, 2 topn, 1 samplerate")
checkJobInfo("analyze table all indexes, columns b, c, d with 2 buckets, 2 topn, 1 samplerate")
tk.MustExec("analyze table t columns a with 1 topn, 3 buckets")
checkJobInfo("analyze table columns a, b, d with 3 buckets, 1 topn, 1 samplerate")
checkJobInfo("analyze table all indexes, columns a, b, d with 3 buckets, 1 topn, 1 samplerate")
tk.MustExec("analyze table t")
checkJobInfo("analyze table columns a, b, d with 3 buckets, 1 topn, 1 samplerate")
checkJobInfo("analyze table all indexes, columns a, b, d with 3 buckets, 1 topn, 1 samplerate")
}

func TestAnalyzePartitionTableWithDynamicMode(t *testing.T) {
Expand Down Expand Up @@ -2775,7 +2775,7 @@ func TestAnalyzeColumnsSkipMVIndexJsonCol(t *testing.T) {
tk.MustQuery("select job_info from mysql.analyze_jobs where table_schema = 'test' and table_name = 't'").Sort().Check(
testkit.Rows(
"analyze index idx_c",
"analyze table columns a, b with 256 buckets, 100 topn, 1 samplerate",
"analyze table index idx_b, columns a, b with 256 buckets, 100 topn, 1 samplerate",
))

is := dom.InfoSchema()
Expand Down Expand Up @@ -2914,7 +2914,7 @@ func TestAnalyzeMVIndex(t *testing.T) {
// 2. analyze and check analyze jobs
tk.MustExec("analyze table t with 1 samplerate, 3 topn")
tk.MustQuery("select id, table_schema, table_name, partition_name, job_info, processed_rows, state from mysql.analyze_jobs order by id").
Check(testkit.Rows("1 test t analyze table columns a with 256 buckets, 3 topn, 1 samplerate 27 finished",
Check(testkit.Rows("1 test t analyze table index ia, column a with 256 buckets, 3 topn, 1 samplerate 27 finished",
"2 test t analyze index ij_signed 190 finished",
"3 test t analyze index ij_unsigned 135 finished",
"4 test t analyze index ij_double 154 finished",
Expand Down
4 changes: 2 additions & 2 deletions pkg/statistics/handle/autoanalyze/autoanalyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func TestAutoAnalyzeWithPredicateColumns(t *testing.T) {

// Check analyze jobs.
tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check(
testkit.Rows("t auto analyze table columns a with 256 buckets, 100 topn, 1 samplerate"),
testkit.Rows("t auto analyze table column a with 256 buckets, 100 topn, 1 samplerate"),
)
}

Expand Down Expand Up @@ -342,7 +342,7 @@ func TestAutoAnalyzeSkipColumnTypes(t *testing.T) {
exec.AutoAnalyzeMinCnt = originalVal
}()
require.True(t, h.HandleAutoAnalyze())
tk.MustQuery("select job_info from mysql.analyze_jobs where job_info like '%auto analyze table%'").Check(testkit.Rows("auto analyze table columns a, b, d with 256 buckets, 100 topn, 1 samplerate"))
tk.MustQuery("select job_info from mysql.analyze_jobs where job_info like '%auto analyze table%'").Check(testkit.Rows("auto analyze table all indexes, columns a, b, d with 256 buckets, 100 topn, 1 samplerate"))
}

func TestAutoAnalyzeOnEmptyTable(t *testing.T) {
Expand Down
19 changes: 12 additions & 7 deletions pkg/statistics/handle/usage/predicate_column_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func TestAnalyzeTableWithPredicateColumns(t *testing.T) {
// Analyze table and check analyze jobs.
tk.MustExec("analyze table t")
tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check(
testkit.Rows("t analyze table columns a with 256 buckets, 100 topn, 1 samplerate"),
testkit.Rows("t analyze table column a with 256 buckets, 100 topn, 1 samplerate"),
)

// More columns.
Expand Down Expand Up @@ -175,7 +175,7 @@ func TestAnalyzeTableWithTiDBPersistAnalyzeOptionsDisabled(t *testing.T) {
// Analyze again, it should use the predicate columns.
tk.MustExec("analyze table t")
tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check(
testkit.Rows("t analyze table columns a with 256 buckets, 100 topn, 1 samplerate"),
testkit.Rows("t analyze table column a with 256 buckets, 100 topn, 1 samplerate"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not columns in here ? It seems that t table has two columns

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we only analyze predicate columns here. We only used column a above.

)
}

Expand Down Expand Up @@ -242,9 +242,8 @@ func TestAnalyzeNoPredicateColumnsWithIndexes(t *testing.T) {
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
),
)
// TODO: we should also include indexes in the job info.
tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check(
testkit.Rows("t analyze table columns a, b with 256 buckets, 100 topn, 1 samplerate"),
testkit.Rows("t analyze table all indexes, columns a, b with 256 buckets, 100 topn, 1 samplerate"),
)
}

Expand All @@ -263,12 +262,18 @@ func TestAnalyzeWithNoPredicateColumnsAndNoIndexes(t *testing.T) {
err := h.DumpColStatsUsageToKV()
require.NoError(t, err)

// Check stats_meta first.
rows := tk.MustQuery("select * from mysql.stats_meta where version != 0").Rows()
require.Len(t, rows, 0, "haven't been analyzed yet")
// Analyze table.
tk.MustExec("analyze table t")
// FIXME: We should correct the job info or skip this kind of job.
tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check(
testkit.Rows("t analyze table columns with 256 buckets, 100 topn, 1 samplerate"),
testkit.Rows("t analyze table with 256 buckets, 100 topn, 1 samplerate"),
)

// Check stats_meta again.
rows = tk.MustQuery("select * from mysql.stats_meta where version != 0 and modify_count = 0").Rows()
require.Len(t, rows, 1, "modify_count should be flushed")
}

func TestAnalyzeNoPredicateColumnsWithPrimaryKey(t *testing.T) {
Expand All @@ -290,6 +295,6 @@ func TestAnalyzeNoPredicateColumnsWithPrimaryKey(t *testing.T) {
// Analyze table.
tk.MustExec("analyze table t")
tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check(
testkit.Rows("t analyze table columns a, b with 256 buckets, 100 topn, 1 samplerate"),
testkit.Rows("t analyze table all indexes, columns a, b with 256 buckets, 100 topn, 1 samplerate"),
)
}
4 changes: 2 additions & 2 deletions tests/integrationtest/r/executor/analyze.result
Original file line number Diff line number Diff line change
Expand Up @@ -824,12 +824,12 @@ delete from mysql.analyze_jobs;
analyze table t;
select job_info from mysql.analyze_jobs where job_info like '%analyze table%';
job_info
analyze table columns a, b, d with 256 buckets, 100 topn, 1 samplerate
analyze table all indexes, columns a, b, d with 256 buckets, 100 topn, 1 samplerate
delete from mysql.analyze_jobs;
analyze table t columns a, e;
select job_info from mysql.analyze_jobs where job_info like '%analyze table%';
job_info
analyze table columns a, d with 256 buckets, 100 topn, 1 samplerate
analyze table all indexes, columns a, d with 256 buckets, 100 topn, 1 samplerate
set @@session.tidb_analyze_skip_column_types = default;
DROP TABLE IF EXISTS Issue34228;
CREATE TABLE Issue34228 (id bigint NOT NULL, dt datetime NOT NULL) PARTITION BY RANGE COLUMNS(dt) (PARTITION p202201 VALUES LESS THAN ("2022-02-01"), PARTITION p202202 VALUES LESS THAN ("2022-03-01"));
Expand Down