Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sessionctx: add tidb_analyze_column_options global variable #54200

Merged
merged 2 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/design/2024-05-23-predicate-columns.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
- [Performance Tests](#performance-tests)
- [Impacts \& Risks](#impacts--risks)
- [If new predicate columns appear, they cannot be analyzed in time](#if-new-predicate-columns-appear-they-cannot-be-analyzed-in-time)
- [Use PREDICATE COLUMNS when your workload's query pattern is relatively stable](#use-predicate-columns-when-your-workloads-query-pattern-is--relatively-stable)
- [Use PREDICATE COLUMNS when your workload's query pattern is relatively stable](#use-predicate-columns-when-your-workloads-query-pattern-is-relatively-stable)
- [Investigation \& Alternatives](#investigation--alternatives)
- [CRDB](#crdb)
- [Summary](#summary)
Expand Down Expand Up @@ -214,14 +214,14 @@ In the experimental implementation, we introduce a new global variable `tidb_ena

But because we decided to track all columns by default, so it becomes unnecessary to use this variable. We will mark it deprecated and remove it in the future.

In this feature, we introduce a new global variable `tidb_analyze_default_column_choice` to control whether to use predicate columns or all columns in the analyze process.
In this feature, we introduce a new global variable `tidb_analyze_column_options` to control whether to use predicate columns or all columns in the analyze process.

Users can set this variable to `ALL` or `PREDICATE` to analyze all columns or only predicate columns. The default value will be `PREDICATE` after this feature is fully implemented.

```sql
SET GLOBAL tidb_analyze_default_column_choice = 'PREDICATE';
SET GLOBAL tidb_analyze_column_options = 'PREDICATE';

SET GLOBAL tidb_analyze_default_column_choice = 'ALL';
SET GLOBAL tidb_analyze_column_options = 'ALL';
```

| Value | Description |
Expand Down
8 changes: 8 additions & 0 deletions pkg/executor/set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,14 @@ func TestSetVar(t *testing.T) {
require.Error(t, tk.ExecToErr("set tidb_enable_column_tracking = 0"))
require.Error(t, tk.ExecToErr("set global tidb_enable_column_tracking = -1"))

// test for tidb_analyze_column_options
tk.MustQuery("select @@tidb_analyze_column_options").Check(testkit.Rows("ALL"))
tk.MustExec("set global tidb_analyze_column_options = 'PREDICATE'")
tk.MustQuery("select @@tidb_analyze_column_options").Check(testkit.Rows("PREDICATE"))
tk.MustExec("set global tidb_analyze_column_options = 'all'")
tk.MustQuery("select @@tidb_analyze_column_options").Check(testkit.Rows("ALL"))
require.Error(t, tk.ExecToErr("set global tidb_analyze_column_options = 'UNKNOWN'"))

// test for tidb_ignore_prepared_cache_close_stmt
tk.MustQuery("select @@global.tidb_ignore_prepared_cache_close_stmt").Check(testkit.Rows("0")) // default value is 0
tk.MustExec("set global tidb_ignore_prepared_cache_close_stmt=1")
Expand Down
29 changes: 28 additions & 1 deletion pkg/sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/pingcap/tidb/pkg/parser"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/charset"
"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/planner/util/fixcontrol"
"github.com/pingcap/tidb/pkg/privilege/privileges/ldap"
Expand Down Expand Up @@ -1010,7 +1011,33 @@ var defaultSysVars = []*SysVar{
RunAutoAnalyze.Store(TiDBOptOn(val))
return nil
},
}, {
},
{
Scope: ScopeGlobal,
Name: TiDBAnalyzeColumnOptions,
Value: DefTiDBAnalyzeColumnOptions,
Type: TypeStr,
GetGlobal: func(ctx context.Context, s *SessionVars) (string, error) {
return AnalyzeColumnOptions.Load(), nil
},
SetGlobal: func(_ context.Context, s *SessionVars, val string) error {
AnalyzeColumnOptions.Store(strings.ToUpper(val))
return nil
},
Validation: func(s *SessionVars, normalizedValue string, originalValue string, scope ScopeFlag) (string, error) {
choice := strings.ToUpper(normalizedValue)
if choice != model.AllColumns.String() && choice != model.PredicateColumns.String() {
return "", errors.Errorf(
"invalid value for %s, it should be either '%s' or '%s'",
TiDBAnalyzeColumnOptions,
model.AllColumns.String(),
model.PredicateColumns.String(),
)
}
return normalizedValue, nil
},
},
{
Scope: ScopeGlobal, Name: TiDBEnableAutoAnalyzePriorityQueue, Value: BoolToOnOff(DefTiDBEnableAutoAnalyzePriorityQueue), Type: TypeBool,
GetGlobal: func(_ context.Context, s *SessionVars) (string, error) {
return BoolToOnOff(EnableAutoAnalyzePriorityQueue.Load()), nil
Expand Down
43 changes: 29 additions & 14 deletions pkg/sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,11 @@ const (
// TiDBEnableColumnTracking enables collecting predicate columns.
// DEPRECATED: This variable is deprecated, please do not use this variable.
TiDBEnableColumnTracking = "tidb_enable_column_tracking"
// TiDBAnalyzeColumnOptions specifies the default column selection strategy for both manual and automatic analyze operations.
// It accepts two values:
// `PREDICATE`: Analyze only the columns that are used in the predicates of the query.
// `ALL`: Analyze all columns in the table.
TiDBAnalyzeColumnOptions = "tidb_analyze_column_options"
// TiDBDisableColumnTrackingTime records the last time TiDBEnableColumnTracking is set off.
// It is used to invalidate the collected predicate columns after turning off TiDBEnableColumnTracking, which avoids physical deletion.
// It doesn't have cache in memory, and we directly get/set the variable value from/to mysql.tidb.
Expand Down Expand Up @@ -1372,6 +1377,7 @@ const (
DefTiDBMemQuotaAnalyze = -1
DefTiDBEnableAutoAnalyze = true
DefTiDBEnableAutoAnalyzePriorityQueue = true
DefTiDBAnalyzeColumnOptions = "ALL"
DefTiDBMemOOMAction = "CANCEL"
DefTiDBMaxAutoAnalyzeTime = 12 * 60 * 60
DefTiDBEnablePrepPlanCache = true
Expand Down Expand Up @@ -1498,20 +1504,29 @@ const (

// Process global variables.
var (
ProcessGeneralLog = atomic.NewBool(false)
RunAutoAnalyze = atomic.NewBool(DefTiDBEnableAutoAnalyze)
EnableAutoAnalyzePriorityQueue = atomic.NewBool(DefTiDBEnableAutoAnalyzePriorityQueue)
GlobalLogMaxDays = atomic.NewInt32(int32(config.GetGlobalConfig().Log.File.MaxDays))
QueryLogMaxLen = atomic.NewInt32(DefTiDBQueryLogMaxLen)
EnablePProfSQLCPU = atomic.NewBool(false)
EnableBatchDML = atomic.NewBool(false)
EnableTmpStorageOnOOM = atomic.NewBool(DefTiDBEnableTmpStorageOnOOM)
ddlReorgWorkerCounter int32 = DefTiDBDDLReorgWorkerCount
ddlReorgBatchSize int32 = DefTiDBDDLReorgBatchSize
ddlFlashbackConcurrency int32 = DefTiDBDDLFlashbackConcurrency
ddlErrorCountLimit int64 = DefTiDBDDLErrorCountLimit
ddlReorgRowFormat int64 = DefTiDBRowFormatV2
maxDeltaSchemaCount int64 = DefTiDBMaxDeltaSchemaCount
ProcessGeneralLog = atomic.NewBool(false)
RunAutoAnalyze = atomic.NewBool(DefTiDBEnableAutoAnalyze)
EnableAutoAnalyzePriorityQueue = atomic.NewBool(DefTiDBEnableAutoAnalyzePriorityQueue)
// AnalyzeColumnOptions is a global variable that indicates the default column choice for ANALYZE.
// The value of this variable is a string that can be one of the following values:
// "PREDICATE", "ALL".
// The behavior of the analyze operation depends on the value of `tidb_persist_analyze_options`:
// 1. If `tidb_persist_analyze_options` is enabled and the column choice from the analyze options record is set to `default`,
// the value of `tidb_analyze_column_options` determines the behavior of the analyze operation.
// 2. If `tidb_persist_analyze_options` is disabled, `tidb_analyze_column_options` is used directly to decide
// whether to analyze all columns or just the predicate columns.
AnalyzeColumnOptions = atomic.NewString(DefTiDBAnalyzeColumnOptions)
GlobalLogMaxDays = atomic.NewInt32(int32(config.GetGlobalConfig().Log.File.MaxDays))
QueryLogMaxLen = atomic.NewInt32(DefTiDBQueryLogMaxLen)
EnablePProfSQLCPU = atomic.NewBool(false)
EnableBatchDML = atomic.NewBool(false)
EnableTmpStorageOnOOM = atomic.NewBool(DefTiDBEnableTmpStorageOnOOM)
ddlReorgWorkerCounter int32 = DefTiDBDDLReorgWorkerCount
ddlReorgBatchSize int32 = DefTiDBDDLReorgBatchSize
ddlFlashbackConcurrency int32 = DefTiDBDDLFlashbackConcurrency
ddlErrorCountLimit int64 = DefTiDBDDLErrorCountLimit
ddlReorgRowFormat int64 = DefTiDBRowFormatV2
maxDeltaSchemaCount int64 = DefTiDBMaxDeltaSchemaCount
// DDLSlowOprThreshold is the threshold for ddl slow operations, uint is millisecond.
DDLSlowOprThreshold = config.GetGlobalConfig().Instance.DDLSlowOprThreshold
ForcePriority = int32(DefTiDBForcePriority)
Expand Down