diff --git a/ddl/BUILD.bazel b/ddl/BUILD.bazel index c4332db0eeb11..49aaa0c59c8da 100644 --- a/ddl/BUILD.bazel +++ b/ddl/BUILD.bazel @@ -198,6 +198,7 @@ go_test( "placement_policy_test.go", "placement_sql_test.go", "primary_key_handle_test.go", + "reorg_partition_test.go", "repair_table_test.go", "resource_group_test.go", "restart_test.go", @@ -265,6 +266,7 @@ go_test( "//util/domainutil", "//util/gcutil", "//util/logutil", + "//util/mathutil", "//util/mock", "//util/sem", "//util/sqlexec", diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 9f12759d16d51..c6bb38297402d 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -60,6 +60,7 @@ const ( typeUpdateColumnWorker backfillerType = 1 typeCleanUpIndexWorker backfillerType = 2 typeAddIndexMergeTmpWorker backfillerType = 3 + typeReorgPartitionWorker backfillerType = 4 // InstanceLease is the instance lease. InstanceLease = 1 * time.Minute @@ -85,6 +86,8 @@ func (bT backfillerType) String() string { return "clean up index" case typeAddIndexMergeTmpWorker: return "merge temporary index" + case typeReorgPartitionWorker: + return "reorganize partition" default: return "unknown" } @@ -140,6 +143,7 @@ func GetLeaseGoTime(currTime time.Time, lease time.Duration) types.Time { // 1: add-index // 2: modify-column-type // 3: clean-up global index +// 4: reorganize partition // // They all have a write reorganization state to back fill data into the rows existed. // Backfilling is time consuming, to accelerate this process, TiDB has built some sub @@ -660,7 +664,6 @@ func (dc *ddlCtx) sendTasksAndWait(scheduler *backfillScheduler, totalAddedCount return errors.Trace(err) } - // nextHandle will be updated periodically in runReorgJob, so no need to update it here. dc.getReorgCtx(reorgInfo.Job.ID).setNextKey(nextKey) metrics.BatchAddIdxHistogram.WithLabelValues(metrics.LblOK).Observe(elapsedTime.Seconds()) logutil.BgLogger().Info("[ddl] backfill workers successfully processed batch", @@ -707,7 +710,7 @@ func getBatchTasks(t table.Table, reorgInfo *reorgInfo, kvRanges []kv.KeyRange, task := &reorgBackfillTask{ id: i, - jobID: job.ID, + jobID: reorgInfo.Job.ID, physicalTable: phyTbl, priority: reorgInfo.Priority, startKey: startKey, @@ -724,7 +727,7 @@ func getBatchTasks(t table.Table, reorgInfo *reorgInfo, kvRanges []kv.KeyRange, } // handleRangeTasks sends tasks to workers, and returns remaining kvRanges that is not handled. -func (dc *ddlCtx) handleRangeTasks(scheduler *backfillScheduler, t table.Table, +func (dc *ddlCtx) handleRangeTasks(scheduler *backfillScheduler, t table.PhysicalTable, totalAddedCount *int64, kvRanges []kv.KeyRange) ([]kv.KeyRange, error) { batchTasks := getBatchTasks(t, scheduler.reorgInfo, kvRanges, backfillTaskChanSize) if len(batchTasks) == 0 { @@ -934,6 +937,13 @@ func (b *backfillScheduler) adjustWorkerSize() error { idxWorker := newCleanUpIndexWorker(sessCtx, i, b.tbl, b.decodeColMap, reorgInfo, jc) runner = newBackfillWorker(jc.ddlJobCtx, idxWorker) worker = idxWorker + case typeReorgPartitionWorker: + partWorker, err := newReorgPartitionWorker(sessCtx, i, b.tbl, b.decodeColMap, reorgInfo, jc) + if err != nil { + return err + } + runner = newBackfillWorker(jc.ddlJobCtx, partWorker) + worker = partWorker default: return errors.New("unknown backfill type") } diff --git a/ddl/column.go b/ddl/column.go index 2f88cbfbe23fd..946abdf904c65 100644 --- a/ddl/column.go +++ b/ddl/column.go @@ -992,9 +992,37 @@ func BuildElements(changingCol *model.ColumnInfo, changingIdxs []*model.IndexInf return elements } -func (w *worker) updatePhysicalTableRow(t table.PhysicalTable, reorgInfo *reorgInfo) error { +func (w *worker) updatePhysicalTableRow(t table.Table, reorgInfo *reorgInfo) error { logutil.BgLogger().Info("[ddl] start to update table row", zap.String("job", reorgInfo.Job.String()), zap.String("reorgInfo", reorgInfo.String())) - return w.writePhysicalTableRecord(w.sessPool, t, typeUpdateColumnWorker, reorgInfo) + if tbl, ok := t.(table.PartitionedTable); ok { + done := false + for !done { + p := tbl.GetPartition(reorgInfo.PhysicalTableID) + if p == nil { + return dbterror.ErrCancelledDDLJob.GenWithStack("Can not find partition id %d for table %d", reorgInfo.PhysicalTableID, t.Meta().ID) + } + workType := typeReorgPartitionWorker + if reorgInfo.Job.Type != model.ActionReorganizePartition { + // workType = typeUpdateColumnWorker + // TODO: Support Modify Column on partitioned table + // https://github.com/pingcap/tidb/issues/38297 + return dbterror.ErrCancelledDDLJob.GenWithStack("Modify Column on partitioned table / typeUpdateColumnWorker not yet supported.") + } + err := w.writePhysicalTableRecord(w.sessPool, p, workType, reorgInfo) + if err != nil { + return err + } + done, err = updateReorgInfo(w.sessPool, tbl, reorgInfo) + if err != nil { + return errors.Trace(err) + } + } + return nil + } + if tbl, ok := t.(table.PhysicalTable); ok { + return w.writePhysicalTableRecord(w.sessPool, tbl, typeUpdateColumnWorker, reorgInfo) + } + return dbterror.ErrCancelledDDLJob.GenWithStack("internal error for phys tbl id: %d tbl id: %d", reorgInfo.PhysicalTableID, t.Meta().ID) } // TestReorgGoroutineRunning is only used in test to indicate the reorg goroutine has been started. @@ -1025,6 +1053,11 @@ func (w *worker) updateCurrentElement(t table.Table, reorgInfo *reorgInfo) error } } + if _, ok := t.(table.PartitionedTable); ok { + // TODO: remove when modify column of partitioned table is supported + // https://github.com/pingcap/tidb/issues/38297 + return dbterror.ErrCancelledDDLJob.GenWithStack("Modify Column on partitioned table / typeUpdateColumnWorker not yet supported.") + } // Get the original start handle and end handle. currentVer, err := getValidCurrentVersion(reorgInfo.d.store) if err != nil { @@ -1152,8 +1185,8 @@ type rowRecord struct { warning *terror.Error // It's used to record the cast warning of a record. } -// getNextKey gets next handle of entry that we are going to process. -func (*updateColumnWorker) getNextKey(taskRange reorgBackfillTask, +// getNextHandleKey gets next handle of entry that we are going to process. +func getNextHandleKey(taskRange reorgBackfillTask, taskDone bool, lastAccessedHandle kv.Key) (nextHandle kv.Key) { if !taskDone { // The task is not done. So we need to pick the last processed entry's handle and add one. @@ -1203,7 +1236,7 @@ func (w *updateColumnWorker) fetchRowColVals(txn kv.Transaction, taskRange reorg } logutil.BgLogger().Debug("[ddl] txn fetches handle info", zap.Uint64("txnStartTS", txn.StartTS()), zap.String("taskRange", taskRange.String()), zap.Duration("takeTime", time.Since(startTime))) - return w.rowRecords, w.getNextKey(taskRange, taskDone, lastAccessedHandle), taskDone, errors.Trace(err) + return w.rowRecords, getNextHandleKey(taskRange, taskDone, lastAccessedHandle), taskDone, errors.Trace(err) } func (w *updateColumnWorker) getRowRecord(handle kv.Handle, recordKey []byte, rawRow []byte) error { diff --git a/ddl/column_modify_test.go b/ddl/column_modify_test.go index 7f3125511c587..574de0ee8f08a 100644 --- a/ddl/column_modify_test.go +++ b/ddl/column_modify_test.go @@ -688,7 +688,7 @@ func TestTransactionWithWriteOnlyColumn(t *testing.T) { dom.DDL().SetHook(hook) done := make(chan error, 1) // test transaction on add column. - go backgroundExec(store, "alter table t1 add column c int not null", done) + go backgroundExec(store, "test", "alter table t1 add column c int not null", done) err := <-done require.NoError(t, err) require.NoError(t, checkErr) @@ -696,7 +696,7 @@ func TestTransactionWithWriteOnlyColumn(t *testing.T) { tk.MustExec("delete from t1") // test transaction on drop column. - go backgroundExec(store, "alter table t1 drop column c", done) + go backgroundExec(store, "test", "alter table t1 drop column c", done) err = <-done require.NoError(t, err) require.NoError(t, checkErr) diff --git a/ddl/db_integration_test.go b/ddl/db_integration_test.go index 9397aae68ebf1..9ac2572692e31 100644 --- a/ddl/db_integration_test.go +++ b/ddl/db_integration_test.go @@ -1215,14 +1215,14 @@ func TestBitDefaultValue(t *testing.T) { );`) } -func backgroundExec(s kv.Storage, sql string, done chan error) { +func backgroundExec(s kv.Storage, schema, sql string, done chan error) { se, err := session.CreateSession4Test(s) if err != nil { done <- errors.Trace(err) return } defer se.Close() - _, err = se.Execute(context.Background(), "use test") + _, err = se.Execute(context.Background(), "use "+schema) if err != nil { done <- errors.Trace(err) return @@ -4292,3 +4292,78 @@ func TestRegexpFunctionsGeneratedColumn(t *testing.T) { tk.MustExec("drop table if exists reg_like") } + +func TestReorgPartitionRangeFailure(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec(`create schema reorgfail`) + tk.MustExec("use reorgfail") + + tk.MustExec("CREATE TABLE t (id int, d varchar(255)) partition by range (id) (partition p0 values less than (1000000), partition p1 values less than (2000000), partition p2 values less than (3000000))") + tk.MustContainErrMsg(`ALTER TABLE t REORGANIZE PARTITION p0,p2 INTO (PARTITION p0 VALUES LESS THAN (1000000))`, "[ddl:8200]Unsupported REORGANIZE PARTITION of RANGE; not adjacent partitions") + tk.MustContainErrMsg(`ALTER TABLE t REORGANIZE PARTITION p0,p2 INTO (PARTITION p0 VALUES LESS THAN (4000000))`, "[ddl:8200]Unsupported REORGANIZE PARTITION of RANGE; not adjacent partitions") +} + +func TestReorgPartitionDocs(t *testing.T) { + // To test what is added as partition management in the docs + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec(`create schema reorgdocs`) + tk.MustExec("use reorgdocs") + tk.MustExec(`CREATE TABLE members ( + id int, + fname varchar(255), + lname varchar(255), + dob date, + data json +) +PARTITION BY RANGE (YEAR(dob)) ( + PARTITION pBefore1950 VALUES LESS THAN (1950), + PARTITION p1950 VALUES LESS THAN (1960), + PARTITION p1960 VALUES LESS THAN (1970), + PARTITION p1970 VALUES LESS THAN (1980), + PARTITION p1980 VALUES LESS THAN (1990), + PARTITION p1990 VALUES LESS THAN (2000))`) + tk.MustExec(`CREATE TABLE member_level ( + id int, + level int, + achievements json +) +PARTITION BY LIST (level) ( + PARTITION l1 VALUES IN (1), + PARTITION l2 VALUES IN (2), + PARTITION l3 VALUES IN (3), + PARTITION l4 VALUES IN (4), + PARTITION l5 VALUES IN (5));`) + tk.MustExec(`ALTER TABLE members DROP PARTITION p1990`) + tk.MustExec(`ALTER TABLE member_level DROP PARTITION l5`) + tk.MustExec(`ALTER TABLE members TRUNCATE PARTITION p1980`) + tk.MustExec(`ALTER TABLE member_level TRUNCATE PARTITION l4`) + tk.MustExec("ALTER TABLE members ADD PARTITION (PARTITION `p1990to2010` VALUES LESS THAN (2010))") + tk.MustExec(`ALTER TABLE member_level ADD PARTITION (PARTITION l5_6 VALUES IN (5,6))`) + tk.MustContainErrMsg(`ALTER TABLE members ADD PARTITION (PARTITION p1990 VALUES LESS THAN (2000))`, "[ddl:1493]VALUES LESS THAN value must be strictly increasing for each partition") + tk.MustExec(`ALTER TABLE members REORGANIZE PARTITION p1990to2010 INTO +(PARTITION p1990 VALUES LESS THAN (2000), + PARTITION p2000 VALUES LESS THAN (2010), + PARTITION p2010 VALUES LESS THAN (2020), + PARTITION p2020 VALUES LESS THAN (2030), + PARTITION pMax VALUES LESS THAN (MAXVALUE))`) + tk.MustExec(`ALTER TABLE member_level REORGANIZE PARTITION l5_6 INTO +(PARTITION l5 VALUES IN (5), + PARTITION l6 VALUES IN (6))`) + tk.MustExec(`ALTER TABLE members REORGANIZE PARTITION pBefore1950,p1950 INTO (PARTITION pBefore1960 VALUES LESS THAN (1960))`) + tk.MustExec(`ALTER TABLE member_level REORGANIZE PARTITION l1,l2 INTO (PARTITION l1_2 VALUES IN (1,2))`) + tk.MustExec(`ALTER TABLE members REORGANIZE PARTITION pBefore1960,p1960,p1970,p1980,p1990,p2000,p2010,p2020,pMax INTO +(PARTITION p1800 VALUES LESS THAN (1900), + PARTITION p1900 VALUES LESS THAN (2000), + PARTITION p2000 VALUES LESS THAN (2100))`) + tk.MustExec(`ALTER TABLE member_level REORGANIZE PARTITION l1_2,l3,l4,l5,l6 INTO +(PARTITION lOdd VALUES IN (1,3,5), + PARTITION lEven VALUES IN (2,4,6))`) + tk.MustContainErrMsg(`ALTER TABLE members REORGANIZE PARTITION p1800,p2000 INTO (PARTITION p2000 VALUES LESS THAN (2100))`, "[ddl:8200]Unsupported REORGANIZE PARTITION of RANGE; not adjacent partitions") + tk.MustExec(`INSERT INTO members VALUES (313, "John", "Doe", "2022-11-22", NULL)`) + tk.MustExec(`ALTER TABLE members REORGANIZE PARTITION p2000 INTO (PARTITION p2000 VALUES LESS THAN (2050))`) + tk.MustContainErrMsg(`ALTER TABLE members REORGANIZE PARTITION p2000 INTO (PARTITION p2000 VALUES LESS THAN (2020))`, "[table:1526]Table has no partition for value 2022") + tk.MustExec(`INSERT INTO member_level (id, level) values (313, 6)`) + tk.MustContainErrMsg(`ALTER TABLE member_level REORGANIZE PARTITION lEven INTO (PARTITION lEven VALUES IN (2,4))`, "[table:1526]Table has no partition for value 6") +} diff --git a/ddl/db_partition_test.go b/ddl/db_partition_test.go index 97cf16ed694bc..d9d609858bb06 100644 --- a/ddl/db_partition_test.go +++ b/ddl/db_partition_test.go @@ -3337,9 +3337,6 @@ func TestPartitionErrorCode(t *testing.T) { );`) tk.MustGetDBError("alter table t_part coalesce partition 4;", dbterror.ErrCoalesceOnlyOnHashPartition) - tk.MustGetErrCode(`alter table t_part reorganize partition p0, p1 into ( - partition p0 values less than (1980));`, errno.ErrUnsupportedDDLOperation) - tk.MustGetErrCode("alter table t_part check partition p0, p1;", errno.ErrUnsupportedDDLOperation) tk.MustGetErrCode("alter table t_part optimize partition p0,p1;", errno.ErrUnsupportedDDLOperation) tk.MustGetErrCode("alter table t_part rebuild partition p0,p1;", errno.ErrUnsupportedDDLOperation) @@ -3751,9 +3748,9 @@ func TestTruncatePartitionMultipleTimes(t *testing.T) { } hook.OnJobUpdatedExported.Store(&onJobUpdatedExportedFunc) done1 := make(chan error, 1) - go backgroundExec(store, "alter table test.t truncate partition p0;", done1) + go backgroundExec(store, "test", "alter table test.t truncate partition p0;", done1) done2 := make(chan error, 1) - go backgroundExec(store, "alter table test.t truncate partition p0;", done2) + go backgroundExec(store, "test", "alter table test.t truncate partition p0;", done2) <-done1 <-done2 require.LessOrEqual(t, errCount, int32(1)) @@ -4584,6 +4581,7 @@ func TestAlterModifyPartitionColTruncateWarning(t *testing.T) { tk.MustQuery(`show warnings`).Check(testkit.Rows(""+ "Warning 1265 Data truncated for column 'a', value is ' 654321'", "Warning 1265 Data truncated for column 'a', value is ' 654321'")) + tk.MustExec(`admin check table t`) } func TestAlterModifyColumnOnPartitionedTableRename(t *testing.T) { diff --git a/ddl/db_table_test.go b/ddl/db_table_test.go index f4952e2e1d483..96a0e3524ae3c 100644 --- a/ddl/db_table_test.go +++ b/ddl/db_table_test.go @@ -210,7 +210,7 @@ func TestTransactionOnAddDropColumn(t *testing.T) { dom.DDL().SetHook(hook) done := make(chan error, 1) // test transaction on add column. - go backgroundExec(store, "alter table t1 add column c int not null after a", done) + go backgroundExec(store, "test", "alter table t1 add column c int not null after a", done) err := <-done require.NoError(t, err) require.Nil(t, checkErr) @@ -218,7 +218,7 @@ func TestTransactionOnAddDropColumn(t *testing.T) { tk.MustExec("delete from t1") // test transaction on drop column. - go backgroundExec(store, "alter table t1 drop column c", done) + go backgroundExec(store, "test", "alter table t1 drop column c", done) err = <-done require.NoError(t, err) require.Nil(t, checkErr) @@ -899,7 +899,7 @@ func TestAddColumn2(t *testing.T) { dom.DDL().SetHook(hook) done := make(chan error, 1) // test transaction on add column. - go backgroundExec(store, "alter table t1 add column c int not null", done) + go backgroundExec(store, "test", "alter table t1 add column c int not null", done) err := <-done require.NoError(t, err) @@ -940,7 +940,7 @@ func TestAddColumn2(t *testing.T) { } dom.DDL().SetHook(hook) - go backgroundExec(store, "alter table t2 add column b int not null default 3", done) + go backgroundExec(store, "test", "alter table t2 add column b int not null default 3", done) err = <-done require.NoError(t, err) re.Check(testkit.Rows("1 2")) diff --git a/ddl/db_test.go b/ddl/db_test.go index 916cdc85ef367..ca442f15c9ba5 100644 --- a/ddl/db_test.go +++ b/ddl/db_test.go @@ -282,7 +282,7 @@ func TestIssue22307(t *testing.T) { dom.DDL().SetHook(hook) done := make(chan error, 1) // test transaction on add column. - go backgroundExec(store, "alter table t drop column b;", done) + go backgroundExec(store, "test", "alter table t drop column b;", done) err := <-done require.NoError(t, err) require.EqualError(t, checkErr1, "[planner:1054]Unknown column 'b' in 'where clause'") diff --git a/ddl/ddl.go b/ddl/ddl.go index 9f101d1562d57..e658547b471d3 100644 --- a/ddl/ddl.go +++ b/ddl/ddl.go @@ -981,7 +981,8 @@ func getIntervalFromPolicy(policy []time.Duration, i int) (time.Duration, bool) func getJobCheckInterval(job *model.Job, i int) (time.Duration, bool) { switch job.Type { - case model.ActionAddIndex, model.ActionAddPrimaryKey, model.ActionModifyColumn: + case model.ActionAddIndex, model.ActionAddPrimaryKey, model.ActionModifyColumn, + model.ActionReorganizePartition: return getIntervalFromPolicy(slowDDLIntervalPolicy, i) case model.ActionCreateTable, model.ActionCreateSchema: return getIntervalFromPolicy(fastDDLIntervalPolicy, i) diff --git a/ddl/ddl_api.go b/ddl/ddl_api.go index a3959282f0aee..4b9bd707f9284 100644 --- a/ddl/ddl_api.go +++ b/ddl/ddl_api.go @@ -3331,7 +3331,7 @@ func (d *ddl) AlterTable(ctx context.Context, sctx sessionctx.Context, stmt *ast case ast.AlterTableCoalescePartitions: err = d.CoalescePartitions(sctx, ident, spec) case ast.AlterTableReorganizePartition: - err = errors.Trace(dbterror.ErrUnsupportedReorganizePartition) + err = d.ReorganizePartitions(sctx, ident, spec) case ast.AlterTableReorganizeFirstPartition: err = dbterror.ErrGeneralUnsupportedDDL.GenWithStackByArgs("MERGE FIRST PARTITION") case ast.AlterTableReorganizeLastPartition: @@ -3907,6 +3907,181 @@ func (d *ddl) AddTablePartitions(ctx sessionctx.Context, ident ast.Ident, spec * return errors.Trace(err) } +// getReorganizedDefinitions return the definitions as they would look like after the REORGANIZE PARTITION is done. +func getReorganizedDefinitions(pi *model.PartitionInfo, firstPartIdx, lastPartIdx int, idMap map[int]struct{}) []model.PartitionDefinition { + tmpDefs := make([]model.PartitionDefinition, 0, len(pi.Definitions)+len(pi.AddingDefinitions)-len(idMap)) + if pi.Type == model.PartitionTypeList { + replaced := false + for i := range pi.Definitions { + if _, ok := idMap[i]; ok { + if !replaced { + tmpDefs = append(tmpDefs, pi.AddingDefinitions...) + replaced = true + } + continue + } + tmpDefs = append(tmpDefs, pi.Definitions[i]) + } + if !replaced { + // For safety, for future non-partitioned table -> partitioned + tmpDefs = append(tmpDefs, pi.AddingDefinitions...) + } + return tmpDefs + } + // Range + tmpDefs = append(tmpDefs, pi.Definitions[:firstPartIdx]...) + tmpDefs = append(tmpDefs, pi.AddingDefinitions...) + if len(pi.Definitions) > (lastPartIdx + 1) { + tmpDefs = append(tmpDefs, pi.Definitions[lastPartIdx+1:]...) + } + return tmpDefs +} + +func getReplacedPartitionIDs(names []model.CIStr, pi *model.PartitionInfo) (int, int, map[int]struct{}, error) { + idMap := make(map[int]struct{}) + var firstPartIdx, lastPartIdx = -1, -1 + for _, name := range names { + partIdx := pi.FindPartitionDefinitionByName(name.L) + if partIdx == -1 { + return 0, 0, nil, errors.Trace(dbterror.ErrWrongPartitionName) + } + if _, ok := idMap[partIdx]; ok { + return 0, 0, nil, errors.Trace(dbterror.ErrSameNamePartition) + } + idMap[partIdx] = struct{}{} + if firstPartIdx == -1 { + firstPartIdx = partIdx + } else { + firstPartIdx = mathutil.Min[int](firstPartIdx, partIdx) + } + if lastPartIdx == -1 { + lastPartIdx = partIdx + } else { + lastPartIdx = mathutil.Max[int](lastPartIdx, partIdx) + } + } + if pi.Type == model.PartitionTypeRange { + if len(idMap) != (lastPartIdx - firstPartIdx + 1) { + return 0, 0, nil, errors.Trace(dbterror.ErrGeneralUnsupportedDDL.GenWithStackByArgs( + "REORGANIZE PARTITION of RANGE; not adjacent partitions")) + } + } + + return firstPartIdx, lastPartIdx, idMap, nil +} + +// ReorganizePartitions reorganize one set of partitions to a new set of partitions. +func (d *ddl) ReorganizePartitions(ctx sessionctx.Context, ident ast.Ident, spec *ast.AlterTableSpec) error { + schema, t, err := d.getSchemaAndTableByIdent(ctx, ident) + if err != nil { + return errors.Trace(infoschema.ErrTableNotExists.FastGenByArgs(ident.Schema, ident.Name)) + } + + meta := t.Meta() + pi := meta.GetPartitionInfo() + if pi == nil { + return dbterror.ErrPartitionMgmtOnNonpartitioned + } + switch pi.Type { + case model.PartitionTypeRange, model.PartitionTypeList: + default: + return errors.Trace(dbterror.ErrUnsupportedReorganizePartition) + } + firstPartIdx, lastPartIdx, idMap, err := getReplacedPartitionIDs(spec.PartitionNames, pi) + if err != nil { + return errors.Trace(err) + } + partInfo, err := BuildAddedPartitionInfo(ctx, meta, spec) + if err != nil { + return errors.Trace(err) + } + if err = d.assignPartitionIDs(partInfo.Definitions); err != nil { + return errors.Trace(err) + } + if err = checkReorgPartitionDefs(ctx, meta, partInfo, firstPartIdx, lastPartIdx, idMap); err != nil { + return errors.Trace(err) + } + if err = handlePartitionPlacement(ctx, partInfo); err != nil { + return errors.Trace(err) + } + + tzName, tzOffset := ddlutil.GetTimeZone(ctx) + job := &model.Job{ + SchemaID: schema.ID, + TableID: meta.ID, + SchemaName: schema.Name.L, + TableName: t.Meta().Name.L, + Type: model.ActionReorganizePartition, + BinlogInfo: &model.HistoryInfo{}, + Args: []interface{}{spec.PartitionNames, partInfo}, + ReorgMeta: &model.DDLReorgMeta{ + SQLMode: ctx.GetSessionVars().SQLMode, + Warnings: make(map[errors.ErrorID]*terror.Error), + WarningsCount: make(map[errors.ErrorID]int64), + Location: &model.TimeZoneLocation{Name: tzName, Offset: tzOffset}, + }, + } + + // No preSplitAndScatter here, it will be done by the worker in onReorganizePartition instead. + err = d.DoDDLJob(ctx, job) + err = d.callHookOnChanged(job, err) + return errors.Trace(err) +} + +func checkReorgPartitionDefs(ctx sessionctx.Context, tblInfo *model.TableInfo, partInfo *model.PartitionInfo, firstPartIdx, lastPartIdx int, idMap map[int]struct{}) error { + // partInfo contains only the new added partition, we have to combine it with the + // old partitions to check all partitions is strictly increasing. + pi := tblInfo.Partition + clonedMeta := tblInfo.Clone() + clonedMeta.Partition.AddingDefinitions = partInfo.Definitions + clonedMeta.Partition.Definitions = getReorganizedDefinitions(clonedMeta.Partition, firstPartIdx, lastPartIdx, idMap) + if err := checkPartitionDefinitionConstraints(ctx, clonedMeta); err != nil { + return errors.Trace(err) + } + if pi.Type == model.PartitionTypeRange { + if lastPartIdx == len(pi.Definitions)-1 { + // Last partition dropped, OK to change the end range + // Also includes MAXVALUE + return nil + } + // Check if the replaced end range is the same as before + lastAddingPartition := partInfo.Definitions[len(partInfo.Definitions)-1] + lastOldPartition := pi.Definitions[lastPartIdx] + if len(pi.Columns) > 0 { + newGtOld, err := checkTwoRangeColumns(ctx, &lastAddingPartition, &lastOldPartition, pi, tblInfo) + if err != nil { + return errors.Trace(err) + } + if newGtOld { + return errors.Trace(dbterror.ErrRangeNotIncreasing) + } + oldGtNew, err := checkTwoRangeColumns(ctx, &lastOldPartition, &lastAddingPartition, pi, tblInfo) + if err != nil { + return errors.Trace(err) + } + if oldGtNew { + return errors.Trace(dbterror.ErrRangeNotIncreasing) + } + return nil + } + + isUnsigned := isPartExprUnsigned(tblInfo) + currentRangeValue, _, err := getRangeValue(ctx, pi.Definitions[lastPartIdx].LessThan[0], isUnsigned) + if err != nil { + return errors.Trace(err) + } + newRangeValue, _, err := getRangeValue(ctx, partInfo.Definitions[len(partInfo.Definitions)-1].LessThan[0], isUnsigned) + if err != nil { + return errors.Trace(err) + } + + if currentRangeValue != newRangeValue { + return errors.Trace(dbterror.ErrRangeNotIncreasing) + } + } + return nil +} + // CoalescePartitions coalesce partitions can be used with a table that is partitioned by hash or key to reduce the number of partitions by number. func (d *ddl) CoalescePartitions(ctx sessionctx.Context, ident ast.Ident, spec *ast.AlterTableSpec) error { is := d.infoCache.GetLatest() diff --git a/ddl/ddl_worker.go b/ddl/ddl_worker.go index 14c5ca54a89c9..f797145882c1c 100644 --- a/ddl/ddl_worker.go +++ b/ddl/ddl_worker.go @@ -40,6 +40,7 @@ import ( tidbutil "github.com/pingcap/tidb/util" "github.com/pingcap/tidb/util/dbterror" "github.com/pingcap/tidb/util/logutil" + "github.com/pingcap/tidb/util/mathutil" "github.com/pingcap/tidb/util/resourcegrouptag" "github.com/pingcap/tidb/util/topsql" topsqlstate "github.com/pingcap/tidb/util/topsql/state" @@ -519,7 +520,8 @@ func jobNeedGC(job *model.Job) bool { switch job.Type { case model.ActionDropSchema, model.ActionDropTable, model.ActionTruncateTable, model.ActionDropIndex, model.ActionDropPrimaryKey, model.ActionDropTablePartition, model.ActionTruncateTablePartition, model.ActionDropColumn, model.ActionModifyColumn, - model.ActionAddIndex, model.ActionAddPrimaryKey: + model.ActionAddIndex, model.ActionAddPrimaryKey, + model.ActionReorganizePartition: return true case model.ActionMultiSchemaChange: for _, sub := range job.MultiSchemaInfo.SubJobs { @@ -673,9 +675,10 @@ func (w *worker) unlockSeqNum(err error) { // DDLBackfillers contains the DDL need backfill step. var DDLBackfillers = map[model.ActionType]string{ - model.ActionAddIndex: "add_index", - model.ActionModifyColumn: "modify_column", - model.ActionDropIndex: "drop_index", + model.ActionAddIndex: "add_index", + model.ActionModifyColumn: "modify_column", + model.ActionDropIndex: "drop_index", + model.ActionReorganizePartition: "reorganize_partition", } func getDDLRequestSource(jobType model.ActionType) string { @@ -1088,6 +1091,8 @@ func (w *worker) runDDLJob(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, ver, err = w.onFlashbackCluster(d, t, job) case model.ActionMultiSchemaChange: ver, err = onMultiSchemaChange(w, d, t, job) + case model.ActionReorganizePartition: + ver, err = w.onReorganizePartition(d, t, job) case model.ActionAlterTTLInfo: ver, err = onTTLInfoChange(d, t, job) case model.ActionAlterTTLRemove: @@ -1361,6 +1366,22 @@ func updateSchemaVersion(d *ddlCtx, t *meta.Meta, job *model.Job, multiInfos ... diff.AffectedOpts = buildPlacementAffects(oldIDs, oldIDs) } } + case model.ActionReorganizePartition: + diff.TableID = job.TableID + if len(job.CtxVars) > 0 { + if droppedIDs, ok := job.CtxVars[0].([]int64); ok { + if addedIDs, ok := job.CtxVars[1].([]int64); ok { + // to use AffectedOpts we need both new and old to have the same length + maxParts := mathutil.Max[int](len(droppedIDs), len(addedIDs)) + // Also initialize them to 0! + oldIDs := make([]int64, maxParts) + copy(oldIDs, droppedIDs) + newIDs := make([]int64, maxParts) + copy(newIDs, addedIDs) + diff.AffectedOpts = buildPlacementAffects(oldIDs, newIDs) + } + } + } case model.ActionCreateTable: diff.TableID = job.TableID if len(job.Args) > 0 { diff --git a/ddl/delete_range.go b/ddl/delete_range.go index 899bb1bb7143e..8734d2c482968 100644 --- a/ddl/delete_range.go +++ b/ddl/delete_range.go @@ -307,9 +307,13 @@ func insertJobIntoDeleteRangeTable(ctx context.Context, sctx sessionctx.Context, endKey := tablecodec.EncodeTablePrefix(tableID + 1) elemID := ea.allocForPhysicalID(tableID) return doInsert(ctx, s, job.ID, elemID, startKey, endKey, now, fmt.Sprintf("table ID is %d", tableID)) - case model.ActionDropTablePartition, model.ActionTruncateTablePartition: + case model.ActionDropTablePartition, model.ActionTruncateTablePartition, model.ActionReorganizePartition: var physicalTableIDs []int64 - if err := job.DecodeArgs(&physicalTableIDs); err != nil { + // partInfo is not used, but is set in ReorgPartition. + // Better to have an additional argument in job.DecodeArgs since it is ignored, + // instead of having one to few, which will remove the data from the job arguments... + var partInfo model.PartitionInfo + if err := job.DecodeArgs(&physicalTableIDs, &partInfo); err != nil { return errors.Trace(err) } for _, physicalTableID := range physicalTableIDs { diff --git a/ddl/index.go b/ddl/index.go index d64316b2d3fa3..f7430cc864cdf 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -1851,7 +1851,16 @@ func getNextPartitionInfo(reorg *reorgInfo, t table.PartitionedTable, currPhysic return 0, nil, nil, nil } - pid, err := findNextPartitionID(currPhysicalTableID, pi.Definitions) + // During data copying, copy data from partitions to be dropped + nextPartitionDefs := pi.DroppingDefinitions + if bytes.Equal(reorg.currElement.TypeKey, meta.IndexElementKey) { + // During index re-creation, process data from partitions to be added + nextPartitionDefs = pi.AddingDefinitions + } + if nextPartitionDefs == nil { + nextPartitionDefs = pi.Definitions + } + pid, err := findNextPartitionID(currPhysicalTableID, nextPartitionDefs) if err != nil { // Fatal error, should not run here. logutil.BgLogger().Error("[ddl] find next partition ID failed", zap.Reflect("table", t), zap.Error(err)) diff --git a/ddl/index_modify_test.go b/ddl/index_modify_test.go index c24f2dfb1b783..2caf54c31c157 100644 --- a/ddl/index_modify_test.go +++ b/ddl/index_modify_test.go @@ -416,7 +416,7 @@ func testAddIndexRollback(t *testing.T, idxName, addIdxSQL, errMsg string, hasNu } done := make(chan error, 1) - go backgroundExec(store, addIdxSQL, done) + go backgroundExec(store, "test", addIdxSQL, done) times := 0 ticker := time.NewTicker(indexModifyLease / 2) diff --git a/ddl/partition.go b/ddl/partition.go index 1a3cab2e3eb01..00a5582be9a54 100644 --- a/ddl/partition.go +++ b/ddl/partition.go @@ -34,6 +34,7 @@ import ( "github.com/pingcap/tidb/domain/infosync" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/infoschema" + "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/meta" "github.com/pingcap/tidb/metrics" "github.com/pingcap/tidb/parser" @@ -42,8 +43,10 @@ import ( "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/parser/opcode" + "github.com/pingcap/tidb/parser/terror" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/table" + "github.com/pingcap/tidb/table/tables" "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/types" driver "github.com/pingcap/tidb/types/parser_driver" @@ -55,9 +58,11 @@ import ( "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/mathutil" "github.com/pingcap/tidb/util/mock" + decoder "github.com/pingcap/tidb/util/rowDecoder" "github.com/pingcap/tidb/util/slice" "github.com/pingcap/tidb/util/sqlexec" "github.com/pingcap/tidb/util/stringutil" + "github.com/prometheus/client_golang/prometheus" "github.com/tikv/client-go/v2/tikv" "go.uber.org/zap" ) @@ -161,7 +166,7 @@ func (w *worker) onAddTablePartition(d *ddlCtx, t *meta.Meta, job *model.Job) (v for _, p := range tblInfo.Partition.AddingDefinitions { ids = append(ids, p.ID) } - if err := alterTableLabelRule(job.SchemaName, tblInfo, ids); err != nil { + if _, err := alterTableLabelRule(job.SchemaName, tblInfo, ids); err != nil { job.State = model.JobStateCancelled return ver, err } @@ -181,8 +186,7 @@ func (w *worker) onAddTablePartition(d *ddlCtx, t *meta.Meta, job *model.Job) (v // be finished. Otherwise the query to this partition will be blocked. needRetry, err := checkPartitionReplica(tblInfo.TiFlashReplica.Count, addingDefinitions, d) if err != nil { - ver, err = convertAddTablePartitionJob2RollbackJob(d, t, job, err, tblInfo) - return ver, err + return convertAddTablePartitionJob2RollbackJob(d, t, job, err, tblInfo) } if needRetry { // The new added partition hasn't been replicated. @@ -226,14 +230,16 @@ func (w *worker) onAddTablePartition(d *ddlCtx, t *meta.Meta, job *model.Job) (v return ver, errors.Trace(err) } -func alterTableLabelRule(schemaName string, meta *model.TableInfo, ids []int64) error { +// alterTableLabelRule updates Label Rules if they exists +// returns true if changed. +func alterTableLabelRule(schemaName string, meta *model.TableInfo, ids []int64) (bool, error) { tableRuleID := fmt.Sprintf(label.TableIDFormat, label.IDPrefix, schemaName, meta.Name.L) oldRule, err := infosync.GetLabelRules(context.TODO(), []string{tableRuleID}) if err != nil { - return errors.Trace(err) + return false, errors.Trace(err) } if len(oldRule) == 0 { - return nil + return false, nil } r, ok := oldRule[tableRuleID] @@ -241,10 +247,11 @@ func alterTableLabelRule(schemaName string, meta *model.TableInfo, ids []int64) rule := r.Reset(schemaName, meta.Name.L, "", ids...) err = infosync.PutLabelRule(context.TODO(), rule) if err != nil { - return errors.Wrapf(err, "failed to notify PD label rule") + return false, errors.Wrapf(err, "failed to notify PD label rule") } + return true, nil } - return nil + return false, nil } func alterTablePartitionBundles(t *meta.Meta, tblInfo *model.TableInfo, addingDefinitions []model.PartitionDefinition) ([]*placement.Bundle, error) { @@ -1307,6 +1314,28 @@ func checkAddPartitionNameUnique(tbInfo *model.TableInfo, pi *model.PartitionInf return nil } +func checkReorgPartitionNames(p *model.PartitionInfo, droppedNames []model.CIStr, pi *model.PartitionInfo) error { + partNames := make(map[string]struct{}) + oldDefs := p.Definitions + for _, oldDef := range oldDefs { + partNames[oldDef.Name.L] = struct{}{} + } + for _, delName := range droppedNames { + if _, ok := partNames[delName.L]; !ok { + return dbterror.ErrSameNamePartition.GenWithStackByArgs(delName) + } + delete(partNames, delName.L) + } + newDefs := pi.Definitions + for _, newDef := range newDefs { + if _, ok := partNames[newDef.Name.L]; ok { + return dbterror.ErrSameNamePartition.GenWithStackByArgs(newDef.Name) + } + partNames[newDef.Name.L] = struct{}{} + } + return nil +} + func checkAndOverridePartitionID(newTableInfo, oldTableInfo *model.TableInfo) error { // If any old partitionInfo has lost, that means the partition ID lost too, so did the data, repair failed. if newTableInfo.Partition == nil { @@ -1676,7 +1705,7 @@ func (w *worker) onDropTablePartition(d *ddlCtx, t *meta.Meta, job *model.Job) ( if err != nil { return ver, errors.Trace(err) } - if job.Type == model.ActionAddTablePartition { + if job.Type == model.ActionAddTablePartition || job.Type == model.ActionReorganizePartition { // It is rollbacked from adding table partition, just remove addingDefinitions from tableInfo. physicalTableIDs, pNames, rollbackBundles := rollbackAddingPartitionInfo(tblInfo) err = infosync.PutRuleBundlesWithDefaultRetry(context.TODO(), rollbackBundles) @@ -1690,7 +1719,7 @@ func (w *worker) onDropTablePartition(d *ddlCtx, t *meta.Meta, job *model.Job) ( return ver, errors.Wrapf(err, "failed to notify PD the label rules") } - if err := alterTableLabelRule(job.SchemaName, tblInfo, getIDs([]*model.TableInfo{tblInfo})); err != nil { + if _, err := alterTableLabelRule(job.SchemaName, tblInfo, getIDs([]*model.TableInfo{tblInfo})); err != nil { job.State = model.JobStateCancelled return ver, err } @@ -1723,7 +1752,7 @@ func (w *worker) onDropTablePartition(d *ddlCtx, t *meta.Meta, job *model.Job) ( return ver, errors.Wrapf(err, "failed to notify PD the label rules") } - if err := alterTableLabelRule(job.SchemaName, tblInfo, getIDs([]*model.TableInfo{tblInfo})); err != nil { + if _, err := alterTableLabelRule(job.SchemaName, tblInfo, getIDs([]*model.TableInfo{tblInfo})); err != nil { job.State = model.JobStateCancelled return ver, err } @@ -1762,7 +1791,7 @@ func (w *worker) onDropTablePartition(d *ddlCtx, t *meta.Meta, job *model.Job) ( } defer w.sessPool.put(sctx) rh := newReorgHandler(newSession(sctx)) - reorgInfo, err := getReorgInfoFromPartitions(d.jobContext(job.ID), d, rh, job, dbInfo, tbl, physicalTableIDs, elements) + reorgInfo, err := getReorgInfoFromPartitions(d.jobContext(job.ID), d, rh, job, dbInfo, pt, physicalTableIDs, elements) if err != nil || reorgInfo.first { // If we run reorg firstly, we should update the job snapshot version @@ -2159,6 +2188,665 @@ func (w *worker) onExchangeTablePartition(d *ddlCtx, t *meta.Meta, job *model.Jo return ver, nil } +func checkReorgPartition(t *meta.Meta, job *model.Job) (*model.TableInfo, []model.CIStr, *model.PartitionInfo, []model.PartitionDefinition, []model.PartitionDefinition, error) { + schemaID := job.SchemaID + tblInfo, err := GetTableInfoAndCancelFaultJob(t, job, schemaID) + if err != nil { + return nil, nil, nil, nil, nil, errors.Trace(err) + } + partInfo := &model.PartitionInfo{} + var partNames []model.CIStr + err = job.DecodeArgs(&partNames, &partInfo) + if err != nil { + job.State = model.JobStateCancelled + return nil, nil, nil, nil, nil, errors.Trace(err) + } + addingDefs := tblInfo.Partition.AddingDefinitions + droppingDefs := tblInfo.Partition.DroppingDefinitions + if len(addingDefs) == 0 { + addingDefs = []model.PartitionDefinition{} + } + if len(droppingDefs) == 0 { + droppingDefs = []model.PartitionDefinition{} + } + return tblInfo, partNames, partInfo, droppingDefs, addingDefs, nil +} + +func (w *worker) onReorganizePartition(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _ error) { + // Handle the rolling back job + if job.IsRollingback() { + ver, err := w.onDropTablePartition(d, t, job) + if err != nil { + return ver, errors.Trace(err) + } + return ver, nil + } + + tblInfo, partNamesCIStr, partInfo, _, addingDefinitions, err := checkReorgPartition(t, job) + if err != nil { + return ver, err + } + partNames := make([]string, len(partNamesCIStr)) + for i := range partNamesCIStr { + partNames[i] = partNamesCIStr[i].L + } + + // In order to skip maintaining the state check in partitionDefinition, TiDB use dropping/addingDefinition instead of state field. + // So here using `job.SchemaState` to judge what the stage of this job is. + originalState := job.SchemaState + switch job.SchemaState { + case model.StateNone: + // job.SchemaState == model.StateNone means the job is in the initial state of reorg partition. + // Here should use partInfo from job directly and do some check action. + // In case there was a race for queueing different schema changes on the same + // table and the checks was not done on the current schema version. + // The partInfo may have been checked against an older schema version for example. + // If the check is done here, it does not need to be repeated, since no other + // DDL on the same table can be run concurrently. + err = checkAddPartitionTooManyPartitions(uint64(len(tblInfo.Partition.Definitions) + + len(partInfo.Definitions) - + len(partNames))) + if err != nil { + job.State = model.JobStateCancelled + return ver, errors.Trace(err) + } + + err = checkReorgPartitionNames(tblInfo.Partition, partNamesCIStr, partInfo) + if err != nil { + job.State = model.JobStateCancelled + return ver, errors.Trace(err) + } + + // Re-check that the dropped/added partitions are compatible with current definition + firstPartIdx, lastPartIdx, idMap, err := getReplacedPartitionIDs(partNamesCIStr, tblInfo.Partition) + if err != nil { + job.State = model.JobStateCancelled + return ver, err + } + sctx := w.sess.Context + if err = checkReorgPartitionDefs(sctx, tblInfo, partInfo, firstPartIdx, lastPartIdx, idMap); err != nil { + job.State = model.JobStateCancelled + return ver, err + } + + // move the adding definition into tableInfo. + updateAddingPartitionInfo(partInfo, tblInfo) + orgDefs := tblInfo.Partition.Definitions + _ = updateDroppingPartitionInfo(tblInfo, partNames) + // Reset original partitions, and keep DroppedDefinitions + tblInfo.Partition.Definitions = orgDefs + + // modify placement settings + for _, def := range tblInfo.Partition.AddingDefinitions { + if _, err = checkPlacementPolicyRefValidAndCanNonValidJob(t, job, def.PlacementPolicyRef); err != nil { + // job.State = model.JobStateCancelled may be set depending on error in function above. + return ver, errors.Trace(err) + } + } + + // From now on we cannot just cancel the DDL, we must roll back if changesMade! + changesMade := false + if tblInfo.TiFlashReplica != nil { + // Must set placement rule, and make sure it succeeds. + if err := infosync.ConfigureTiFlashPDForPartitions(true, &tblInfo.Partition.AddingDefinitions, tblInfo.TiFlashReplica.Count, &tblInfo.TiFlashReplica.LocationLabels, tblInfo.ID); err != nil { + logutil.BgLogger().Error("ConfigureTiFlashPDForPartitions fails", zap.Error(err)) + job.State = model.JobStateCancelled + return ver, errors.Trace(err) + } + changesMade = true + // In the next step, StateDeleteOnly, wait to verify the TiFlash replicas are OK + } + + bundles, err := alterTablePartitionBundles(t, tblInfo, tblInfo.Partition.AddingDefinitions) + if err != nil { + if !changesMade { + job.State = model.JobStateCancelled + return ver, errors.Trace(err) + } + return convertAddTablePartitionJob2RollbackJob(d, t, job, err, tblInfo) + } + + if len(bundles) > 0 { + if err = infosync.PutRuleBundlesWithDefaultRetry(context.TODO(), bundles); err != nil { + if !changesMade { + job.State = model.JobStateCancelled + return ver, errors.Wrapf(err, "failed to notify PD the placement rules") + } + return convertAddTablePartitionJob2RollbackJob(d, t, job, err, tblInfo) + } + changesMade = true + } + + ids := getIDs([]*model.TableInfo{tblInfo}) + for _, p := range tblInfo.Partition.AddingDefinitions { + ids = append(ids, p.ID) + } + changed, err := alterTableLabelRule(job.SchemaName, tblInfo, ids) + changesMade = changesMade || changed + if err != nil { + if !changesMade { + job.State = model.JobStateCancelled + return ver, err + } + return convertAddTablePartitionJob2RollbackJob(d, t, job, err, tblInfo) + } + + // Doing the preSplitAndScatter here, since all checks are completed, + // and we will soon start writing to the new partitions. + if s, ok := d.store.(kv.SplittableStore); ok && s != nil { + // partInfo only contains the AddingPartitions + splitPartitionTableRegion(w.sess.Context, s, tblInfo, partInfo, true) + } + + // Assume we cannot have more than MaxUint64 rows, set the progress to 1/10 of that. + metrics.GetBackfillProgressByLabel(metrics.LblReorgPartition, job.SchemaName, tblInfo.Name.String()).Set(0.1 / float64(math.MaxUint64)) + job.SchemaState = model.StateDeleteOnly + tblInfo.Partition.DDLState = model.StateDeleteOnly + ver, err = updateVersionAndTableInfoWithCheck(d, t, job, tblInfo, true) + if err != nil { + return ver, errors.Trace(err) + } + + // Is really both StateDeleteOnly AND StateWriteOnly needed? + // If transaction A in WriteOnly inserts row 1 (into both new and old partition set) + // and then transaction B in DeleteOnly deletes that row (in both new and old) + // does really transaction B need to do the delete in the new partition? + // Yes, otherwise it would still be there when the WriteReorg happens, + // and WriteReorg would only copy existing rows to the new table, so unless it is + // deleted it would result in a ghost row! + // What about update then? + // Updates also need to be handled for new partitions in DeleteOnly, + // since it would not be overwritten during Reorganize phase. + // BUT if the update results in adding in one partition and deleting in another, + // THEN only the delete must happen in the new partition set, not the insert! + case model.StateDeleteOnly: + // This state is to confirm all servers can not see the new partitions when reorg is running, + // so that all deletes will be done in both old and new partitions when in either DeleteOnly + // or WriteOnly state. + // Also using the state for checking that the optional TiFlash replica is available, making it + // in a state without (much) data and easy to retry without side effects. + + // Reason for having it here, is to make it easy for retry, and better to make sure it is in-sync + // as early as possible, to avoid a long wait after the data copying. + if tblInfo.TiFlashReplica != nil && tblInfo.TiFlashReplica.Available { + // For available state, the new added partition should wait its replica to + // be finished, otherwise the query to this partition will be blocked. + count := tblInfo.TiFlashReplica.Count + needRetry, err := checkPartitionReplica(count, addingDefinitions, d) + if err != nil { + // need to rollback, since we tried to register the new + // partitions before! + return convertAddTablePartitionJob2RollbackJob(d, t, job, err, tblInfo) + } + if needRetry { + // The new added partition hasn't been replicated. + // Do nothing to the job this time, wait next worker round. + time.Sleep(tiflashCheckTiDBHTTPAPIHalfInterval) + // Set the error here which will lead this job exit when it's retry times beyond the limitation. + return ver, errors.Errorf("[ddl] add partition wait for tiflash replica to complete") + } + + // When TiFlash Replica is ready, we must move them into `AvailablePartitionIDs`. + // Since onUpdateFlashReplicaStatus cannot see the partitions yet (not public) + for _, d := range addingDefinitions { + tblInfo.TiFlashReplica.AvailablePartitionIDs = append(tblInfo.TiFlashReplica.AvailablePartitionIDs, d.ID) + } + } + + job.SchemaState = model.StateWriteOnly + tblInfo.Partition.DDLState = model.StateWriteOnly + metrics.GetBackfillProgressByLabel(metrics.LblReorgPartition, job.SchemaName, tblInfo.Name.String()).Set(0.2 / float64(math.MaxUint64)) + ver, err = updateVersionAndTableInfo(d, t, job, tblInfo, originalState != job.SchemaState) + case model.StateWriteOnly: + // Insert this state to confirm all servers can see the new partitions when reorg is running, + // so that new data will be updated in both old and new partitions when reorganizing. + job.SnapshotVer = 0 + job.SchemaState = model.StateWriteReorganization + tblInfo.Partition.DDLState = model.StateWriteReorganization + metrics.GetBackfillProgressByLabel(metrics.LblReorgPartition, job.SchemaName, tblInfo.Name.String()).Set(0.3 / float64(math.MaxUint64)) + ver, err = updateVersionAndTableInfo(d, t, job, tblInfo, originalState != job.SchemaState) + case model.StateWriteReorganization: + physicalTableIDs := getPartitionIDsFromDefinitions(tblInfo.Partition.DroppingDefinitions) + tbl, err2 := getTable(d.store, job.SchemaID, tblInfo) + if err2 != nil { + return ver, errors.Trace(err2) + } + // TODO: If table has global indexes, we need reorg to clean up them. + // and then add the new partition ids back... + if _, ok := tbl.(table.PartitionedTable); ok && hasGlobalIndex(tblInfo) { + err = errors.Trace(dbterror.ErrCancelledDDLJob.GenWithStack("global indexes is not supported yet for reorganize partition")) + return convertAddTablePartitionJob2RollbackJob(d, t, job, err, tblInfo) + } + var done bool + done, ver, err = doPartitionReorgWork(w, d, t, job, tbl, physicalTableIDs) + + if !done { + return ver, err + } + + firstPartIdx, lastPartIdx, idMap, err2 := getReplacedPartitionIDs(partNamesCIStr, tblInfo.Partition) + failpoint.Inject("reorgPartWriteReorgReplacedPartIDsFail", func(val failpoint.Value) { + if val.(bool) { + err2 = errors.New("Injected error by reorgPartWriteReorgReplacedPartIDsFail") + } + }) + if err2 != nil { + return ver, err2 + } + newDefs := getReorganizedDefinitions(tblInfo.Partition, firstPartIdx, lastPartIdx, idMap) + + // From now on, use the new definitions, but keep the Adding and Dropping for double write + tblInfo.Partition.Definitions = newDefs + tblInfo.Partition.Num = uint64(len(newDefs)) + + // Now all the data copying is done, but we cannot simply remove the droppingDefinitions + // since they are a part of the normal Definitions that other nodes with + // the current schema version. So we need to double write for one more schema version + job.SchemaState = model.StateDeleteReorganization + tblInfo.Partition.DDLState = model.StateDeleteReorganization + ver, err = updateVersionAndTableInfo(d, t, job, tblInfo, originalState != job.SchemaState) + + case model.StateDeleteReorganization: + // Drop the droppingDefinitions and finish the DDL + // This state is needed for the case where client A sees the schema + // with version of StateWriteReorg and would not see updates of + // client B that writes to the new partitions, previously + // addingDefinitions, since it would not double write to + // the droppingDefinitions during this time + // By adding StateDeleteReorg state, client B will write to both + // the new (previously addingDefinitions) AND droppingDefinitions + + // Register the droppingDefinitions ids for rangeDelete + // and the addingDefinitions for handling in the updateSchemaVersion + physicalTableIDs := getPartitionIDsFromDefinitions(tblInfo.Partition.DroppingDefinitions) + newIDs := getPartitionIDsFromDefinitions(partInfo.Definitions) + job.CtxVars = []interface{}{physicalTableIDs, newIDs} + definitionsToDrop := tblInfo.Partition.DroppingDefinitions + tblInfo.Partition.DroppingDefinitions = nil + tblInfo.Partition.AddingDefinitions = nil + ver, err = updateVersionAndTableInfo(d, t, job, tblInfo, true) + failpoint.Inject("reorgPartWriteReorgSchemaVersionUpdateFail", func(val failpoint.Value) { + if val.(bool) { + err = errors.New("Injected error by reorgPartWriteReorgSchemaVersionUpdateFail") + } + }) + if err != nil { + return ver, errors.Trace(err) + } + job.SchemaState = model.StateNone + tblInfo.Partition.DDLState = model.StateNone + job.FinishTableJob(model.JobStateDone, model.StateNone, ver, tblInfo) + // How to handle this? + // Seems to only trigger asynchronous update of statistics. + // Should it actually be synchronous? + asyncNotifyEvent(d, &util.Event{Tp: model.ActionReorganizePartition, TableInfo: tblInfo, PartInfo: &model.PartitionInfo{Definitions: definitionsToDrop}}) + // A background job will be created to delete old partition data. + job.Args = []interface{}{physicalTableIDs} + + default: + err = dbterror.ErrInvalidDDLState.GenWithStackByArgs("partition", job.SchemaState) + } + + return ver, errors.Trace(err) +} + +func doPartitionReorgWork(w *worker, d *ddlCtx, t *meta.Meta, job *model.Job, tbl table.Table, physTblIDs []int64) (done bool, ver int64, err error) { + job.ReorgMeta.ReorgTp = model.ReorgTypeTxn + sctx, err1 := w.sessPool.get() + if err1 != nil { + return done, ver, err1 + } + defer w.sessPool.put(sctx) + rh := newReorgHandler(newSession(sctx)) + elements := BuildElements(tbl.Meta().Columns[0], tbl.Meta().Indices) + partTbl, ok := tbl.(table.PartitionedTable) + if !ok { + return false, ver, dbterror.ErrUnsupportedReorganizePartition.GenWithStackByArgs() + } + dbInfo, err := t.GetDatabase(job.SchemaID) + if err != nil { + return false, ver, errors.Trace(err) + } + reorgInfo, err := getReorgInfoFromPartitions(d.jobContext(job.ID), d, rh, job, dbInfo, partTbl, physTblIDs, elements) + err = w.runReorgJob(rh, reorgInfo, tbl.Meta(), d.lease, func() (reorgErr error) { + defer tidbutil.Recover(metrics.LabelDDL, "doPartitionReorgWork", + func() { + reorgErr = dbterror.ErrCancelledDDLJob.GenWithStack("reorganize partition for table `%v` panic", tbl.Meta().Name) + }, false) + return w.reorgPartitionDataAndIndex(tbl, reorgInfo) + }) + if err != nil { + if dbterror.ErrWaitReorgTimeout.Equal(err) { + // If timeout, we should return, check for the owner and re-wait job done. + return false, ver, nil + } + if kv.IsTxnRetryableError(err) { + return false, ver, errors.Trace(err) + } + if err1 := rh.RemoveDDLReorgHandle(job, reorgInfo.elements); err1 != nil { + logutil.BgLogger().Warn("[ddl] reorg partition job failed, RemoveDDLReorgHandle failed, can't convert job to rollback", + zap.String("job", job.String()), zap.Error(err1)) + } + logutil.BgLogger().Warn("[ddl] reorg partition job failed, convert job to rollback", zap.String("job", job.String()), zap.Error(err)) + ver, err = convertAddTablePartitionJob2RollbackJob(d, t, job, err, tbl.Meta()) + return false, ver, errors.Trace(err) + } + return true, ver, err +} + +type reorgPartitionWorker struct { + *backfillCtx + metricCounter prometheus.Counter + + // Static allocated to limit memory allocations + rowRecords []*rowRecord + rowDecoder *decoder.RowDecoder + rowMap map[int64]types.Datum + writeColOffsetMap map[int64]int + maxOffset int + reorgedTbl table.PartitionedTable + + jobContext *JobContext +} + +func newReorgPartitionWorker(sessCtx sessionctx.Context, i int, t table.PhysicalTable, decodeColMap map[int64]decoder.Column, reorgInfo *reorgInfo, jc *JobContext) (*reorgPartitionWorker, error) { + reorgedTbl, err := tables.GetReorganizedPartitionedTable(t) + if err != nil { + return nil, errors.Trace(err) + } + pt := t.GetPartitionedTable() + if pt == nil { + return nil, dbterror.ErrUnsupportedReorganizePartition.GenWithStackByArgs() + } + partColIDs := pt.GetPartitionColumnIDs() + writeColOffsetMap := make(map[int64]int, len(partColIDs)) + maxOffset := 0 + for _, col := range pt.Cols() { + found := false + for _, id := range partColIDs { + if col.ID == id { + found = true + break + } + } + if !found { + continue + } + writeColOffsetMap[col.ID] = col.Offset + maxOffset = mathutil.Max[int](maxOffset, col.Offset) + } + return &reorgPartitionWorker{ + backfillCtx: newBackfillCtx(reorgInfo.d, i, sessCtx, reorgInfo.ReorgMeta.ReorgTp, reorgInfo.SchemaName, t), + metricCounter: metrics.BackfillTotalCounter.WithLabelValues(metrics.GenerateReorgLabel("reorg_partition_rate", reorgInfo.SchemaName, t.Meta().Name.String())), + rowDecoder: decoder.NewRowDecoder(t, t.WritableCols(), decodeColMap), + rowMap: make(map[int64]types.Datum, len(decodeColMap)), + jobContext: jc, + writeColOffsetMap: writeColOffsetMap, + maxOffset: maxOffset, + reorgedTbl: reorgedTbl, + }, nil +} + +func (w *reorgPartitionWorker) GetTasks() ([]*BackfillJob, error) { + panic("[ddl] reorg partition worker GetTask function doesn't implement") +} + +func (w *reorgPartitionWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { + oprStartTime := time.Now() + ctx := kv.WithInternalSourceType(context.Background(), w.jobContext.ddlJobSourceType()) + errInTxn = kv.RunInNewTxn(ctx, w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { + taskCtx.addedCount = 0 + taskCtx.scanCount = 0 + txn.SetOption(kv.Priority, handleRange.priority) + if tagger := w.GetCtx().getResourceGroupTaggerForTopSQL(handleRange.getJobID()); tagger != nil { + txn.SetOption(kv.ResourceGroupTagger, tagger) + } + + rowRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, handleRange) + if err != nil { + return errors.Trace(err) + } + taskCtx.nextKey = nextKey + taskCtx.done = taskDone + + warningsMap := make(map[errors.ErrorID]*terror.Error) + warningsCountMap := make(map[errors.ErrorID]int64) + for _, prr := range rowRecords { + taskCtx.scanCount++ + + err = txn.Set(prr.key, prr.vals) + if err != nil { + return errors.Trace(err) + } + taskCtx.addedCount++ + if prr.warning != nil { + if _, ok := warningsCountMap[prr.warning.ID()]; ok { + warningsCountMap[prr.warning.ID()]++ + } else { + warningsCountMap[prr.warning.ID()] = 1 + warningsMap[prr.warning.ID()] = prr.warning + } + } + // TODO: Future optimization: also write the indexes here? + // What if the transaction limit is just enough for a single row, without index? + // Hmm, how could that be in the first place? + // For now, implement the batch-txn w.addTableIndex, + // since it already exists and is in use + } + + // Collect the warnings. + taskCtx.warnings, taskCtx.warningsCount = warningsMap, warningsCountMap + + // also add the index entries here? And make sure they are not added somewhere else + + return nil + }) + logSlowOperations(time.Since(oprStartTime), "BackfillDataInTxn", 3000) + + return +} + +func (w *reorgPartitionWorker) fetchRowColVals(txn kv.Transaction, taskRange reorgBackfillTask) ([]*rowRecord, kv.Key, bool, error) { + w.rowRecords = w.rowRecords[:0] + startTime := time.Now() + + // taskDone means that the added handle is out of taskRange.endHandle. + taskDone := false + sysTZ := w.sessCtx.GetSessionVars().StmtCtx.TimeZone + + tmpRow := make([]types.Datum, w.maxOffset+1) + var lastAccessedHandle kv.Key + oprStartTime := startTime + err := iterateSnapshotKeys(w.GetCtx().jobContext(taskRange.getJobID()), w.sessCtx.GetStore(), taskRange.priority, w.table.RecordPrefix(), txn.StartTS(), taskRange.startKey, taskRange.endKey, + func(handle kv.Handle, recordKey kv.Key, rawRow []byte) (bool, error) { + oprEndTime := time.Now() + logSlowOperations(oprEndTime.Sub(oprStartTime), "iterateSnapshotKeys in reorgPartitionWorker fetchRowColVals", 0) + oprStartTime = oprEndTime + + if taskRange.endInclude { + taskDone = recordKey.Cmp(taskRange.endKey) > 0 + } else { + taskDone = recordKey.Cmp(taskRange.endKey) >= 0 + } + + if taskDone || len(w.rowRecords) >= w.batchCnt { + return false, nil + } + + _, err := w.rowDecoder.DecodeTheExistedColumnMap(w.sessCtx, handle, rawRow, sysTZ, w.rowMap) + if err != nil { + return false, errors.Trace(err) + } + + // Set the partitioning columns and calculate which partition to write to + for colID, offset := range w.writeColOffsetMap { + if d, ok := w.rowMap[colID]; ok { + tmpRow[offset] = d + } else { + return false, dbterror.ErrUnsupportedReorganizePartition.GenWithStackByArgs() + } + } + p, err := w.reorgedTbl.GetPartitionByRow(w.sessCtx, tmpRow) + if err != nil { + return false, errors.Trace(err) + } + pid := p.GetPhysicalID() + newKey := tablecodec.EncodeTablePrefix(pid) + newKey = append(newKey, recordKey[len(newKey):]...) + w.rowRecords = append(w.rowRecords, &rowRecord{ + key: newKey, vals: rawRow, + }) + + w.cleanRowMap() + lastAccessedHandle = recordKey + if recordKey.Cmp(taskRange.endKey) == 0 { + taskDone = true + return false, nil + } + return true, nil + }) + + if len(w.rowRecords) == 0 { + taskDone = true + } + + logutil.BgLogger().Debug("[ddl] txn fetches handle info", zap.Uint64("txnStartTS", txn.StartTS()), zap.String("taskRange", taskRange.String()), zap.Duration("takeTime", time.Since(startTime))) + return w.rowRecords, getNextHandleKey(taskRange, taskDone, lastAccessedHandle), taskDone, errors.Trace(err) +} + +func (w *reorgPartitionWorker) cleanRowMap() { + for id := range w.rowMap { + delete(w.rowMap, id) + } +} + +func (w *reorgPartitionWorker) AddMetricInfo(cnt float64) { + w.metricCounter.Add(cnt) +} + +func (w *reorgPartitionWorker) String() string { + return typeReorgPartitionWorker.String() +} + +func (w *reorgPartitionWorker) GetTask() (*BackfillJob, error) { + panic("[ddl] partition reorg worker does not implement GetTask function") +} + +func (w *reorgPartitionWorker) UpdateTask(*BackfillJob) error { + panic("[ddl] partition reorg worker does not implement UpdateTask function") +} + +func (w *reorgPartitionWorker) FinishTask(*BackfillJob) error { + panic("[ddl] partition reorg worker does not implement FinishTask function") +} + +func (w *reorgPartitionWorker) GetCtx() *backfillCtx { + return w.backfillCtx +} + +func (w *worker) reorgPartitionDataAndIndex(t table.Table, reorgInfo *reorgInfo) error { + // First copy all table data to the new partitions + // from each of the DroppingDefinitions partitions. + // Then create all indexes on the AddingDefinitions partitions + // for each new index, one partition at a time. + + // Copy the data from the DroppingDefinitions to the AddingDefinitions + if bytes.Equal(reorgInfo.currElement.TypeKey, meta.ColumnElementKey) { + err := w.updatePhysicalTableRow(t, reorgInfo) + if err != nil { + return errors.Trace(err) + } + } + + failpoint.Inject("reorgPartitionAfterDataCopy", func(val failpoint.Value) { + //nolint:forcetypeassert + if val.(bool) { + panic("panic test in reorgPartitionAfterDataCopy") + } + }) + + // Rewrite this to do all indexes at once in addTableIndex + // instead of calling it once per index (meaning reading the table multiple times) + // But for now, try to understand how it works... + firstNewPartitionID := t.Meta().Partition.AddingDefinitions[0].ID + startElementOffset := 0 + //startElementOffsetToResetHandle := -1 + // This backfill job starts with backfilling index data, whose index ID is currElement.ID. + if !bytes.Equal(reorgInfo.currElement.TypeKey, meta.IndexElementKey) { + // First run, have not yet started backfilling index data + // Restart with the first new partition. + // TODO: handle remove partitioning + reorgInfo.PhysicalTableID = firstNewPartitionID + } else { + // The job was interrupted and has been restarted, + // reset and start from where it was done + for i, element := range reorgInfo.elements[1:] { + if reorgInfo.currElement.ID == element.ID { + startElementOffset = i + //startElementOffsetToResetHandle = i + break + } + } + } + + for i := startElementOffset; i < len(reorgInfo.elements[1:]); i++ { + // Now build the indexes in the new partitions + var physTbl table.PhysicalTable + if tbl, ok := t.(table.PartitionedTable); ok { + physTbl = tbl.GetPartition(reorgInfo.PhysicalTableID) + } else if tbl, ok := t.(table.PhysicalTable); ok { + // This may be used when partitioning a non-partitioned table + physTbl = tbl + } + // Get the original start handle and end handle. + currentVer, err := getValidCurrentVersion(reorgInfo.d.store) + if err != nil { + return errors.Trace(err) + } + // TODO: Can we improve this in case of a crash? + // like where the regInfo PhysicalTableID and element is the same, + // and the tableid in the key-prefix regInfo.StartKey and regInfo.EndKey matches with PhysicalTableID + // do not change the reorgInfo start/end key + startHandle, endHandle, err := getTableRange(reorgInfo.d.jobContext(reorgInfo.Job.ID), reorgInfo.d, physTbl, currentVer.Ver, reorgInfo.Job.Priority) + if err != nil { + return errors.Trace(err) + } + + // Always (re)start with the full PhysicalTable range + reorgInfo.StartKey, reorgInfo.EndKey = startHandle, endHandle + + // Update the element in the reorgCtx to keep the atomic access for daemon-worker. + w.getReorgCtx(reorgInfo.Job.ID).setCurrentElement(reorgInfo.elements[i+1]) + + // Update the element in the reorgInfo for updating the reorg meta below. + reorgInfo.currElement = reorgInfo.elements[i+1] + // Write the reorg info to store so the whole reorganize process can recover from panic. + err = reorgInfo.UpdateReorgMeta(reorgInfo.StartKey, w.sessPool) + logutil.BgLogger().Info("[ddl] update column and indexes", + zap.Int64("jobID", reorgInfo.Job.ID), + zap.ByteString("elementType", reorgInfo.currElement.TypeKey), + zap.Int64("elementID", reorgInfo.currElement.ID), + zap.Int64("partitionTableId", physTbl.GetPhysicalID()), + zap.String("startHandle", hex.EncodeToString(reorgInfo.StartKey)), + zap.String("endHandle", hex.EncodeToString(reorgInfo.EndKey))) + if err != nil { + return errors.Trace(err) + } + err = w.addTableIndex(t, reorgInfo) + if err != nil { + return errors.Trace(err) + } + reorgInfo.PhysicalTableID = firstNewPartitionID + } + failpoint.Inject("reorgPartitionAfterIndex", func(val failpoint.Value) { + //nolint:forcetypeassert + if val.(bool) { + panic("panic test in reorgPartitionAfterIndex") + } + }) + return nil +} + func bundlesForExchangeTablePartition(t *meta.Meta, job *model.Job, pt *model.TableInfo, newPar *model.PartitionDefinition, nt *model.TableInfo) ([]*placement.Bundle, error) { bundles := make([]*placement.Bundle, 0, 3) diff --git a/ddl/reorg.go b/ddl/reorg.go index f520f6852fa72..3bde3fdaa844b 100644 --- a/ddl/reorg.go +++ b/ddl/reorg.go @@ -18,6 +18,7 @@ import ( "encoding/hex" "fmt" "strconv" + "strings" "sync" "sync/atomic" "time" @@ -154,6 +155,7 @@ func (rc *reorgCtx) getRowCount() int64 { // 1: add index // 2: alter column type // 3: clean global index +// 4: reorganize partitions /* ddl goroutine >---------+ ^ | @@ -304,6 +306,10 @@ func updateBackfillProgress(w *worker, reorgInfo *reorgInfo, tblInfo *model.Tabl if progress > 1 { progress = 1 } + logutil.BgLogger().Debug("[ddl] update progress", + zap.Float64("progress", progress), + zap.Int64("addedRowCount", addedRowCount), + zap.Int64("totalCount", totalCount)) } switch reorgInfo.Type { case model.ActionAddIndex, model.ActionAddPrimaryKey: @@ -316,6 +322,8 @@ func updateBackfillProgress(w *worker, reorgInfo *reorgInfo, tblInfo *model.Tabl metrics.GetBackfillProgressByLabel(label, reorgInfo.SchemaName, tblInfo.Name.String()).Set(progress * 100) case model.ActionModifyColumn: metrics.GetBackfillProgressByLabel(metrics.LblModifyColumn, reorgInfo.SchemaName, tblInfo.Name.String()).Set(progress * 100) + case model.ActionReorganizePartition: + metrics.GetBackfillProgressByLabel(metrics.LblReorgPartition, reorgInfo.SchemaName, tblInfo.Name.String()).Set(progress * 100) } } @@ -332,8 +340,20 @@ func getTableTotalCount(w *worker, tblInfo *model.TableInfo) int64 { if !ok { return statistics.PseudoRowCount } - sql := "select table_rows from information_schema.tables where tidb_table_id=%?;" - rows, _, err := executor.ExecRestrictedSQL(w.ctx, nil, sql, tblInfo.ID) + var rows []chunk.Row + if tblInfo.Partition != nil && len(tblInfo.Partition.DroppingDefinitions) > 0 { + // if Reorganize Partition, only select number of rows from the selected partitions! + defs := tblInfo.Partition.DroppingDefinitions + partIDs := make([]string, 0, len(defs)) + for _, def := range defs { + partIDs = append(partIDs, strconv.FormatInt(def.ID, 10)) + } + sql := "select sum(table_rows) from information_schema.partitions where tidb_partition_id in (%?);" + rows, _, err = executor.ExecRestrictedSQL(w.ctx, nil, sql, strings.Join(partIDs, ",")) + } else { + sql := "select table_rows from information_schema.tables where tidb_table_id=%?;" + rows, _, err = executor.ExecRestrictedSQL(w.ctx, nil, sql, tblInfo.ID) + } if err != nil { return statistics.PseudoRowCount } @@ -680,7 +700,7 @@ func getReorgInfo(ctx *JobContext, d *ddlCtx, rh *reorgHandler, job *model.Job, return &info, nil } -func getReorgInfoFromPartitions(ctx *JobContext, d *ddlCtx, rh *reorgHandler, job *model.Job, dbInfo *model.DBInfo, tbl table.Table, partitionIDs []int64, elements []*meta.Element) (*reorgInfo, error) { +func getReorgInfoFromPartitions(ctx *JobContext, d *ddlCtx, rh *reorgHandler, job *model.Job, dbInfo *model.DBInfo, tbl table.PartitionedTable, partitionIDs []int64, elements []*meta.Element) (*reorgInfo, error) { var ( element *meta.Element start kv.Key @@ -698,8 +718,9 @@ func getReorgInfoFromPartitions(ctx *JobContext, d *ddlCtx, rh *reorgHandler, jo return nil, errors.Trace(err) } pid = partitionIDs[0] - tb := tbl.(table.PartitionedTable).GetPartition(pid) - start, end, err = getTableRange(ctx, d, tb, ver.Ver, job.Priority) + physTbl := tbl.GetPartition(pid) + + start, end, err = getTableRange(ctx, d, physTbl, ver.Ver, job.Priority) if err != nil { return nil, errors.Trace(err) } diff --git a/ddl/reorg_partition_test.go b/ddl/reorg_partition_test.go new file mode 100644 index 0000000000000..fb0ae51f29b25 --- /dev/null +++ b/ddl/reorg_partition_test.go @@ -0,0 +1,822 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ddl_test + +import ( + "bytes" + "context" + "encoding/hex" + "fmt" + "testing" + + "github.com/pingcap/failpoint" + "github.com/pingcap/tidb/ddl/internal/callback" + "github.com/pingcap/tidb/domain" + "github.com/pingcap/tidb/errno" + "github.com/pingcap/tidb/parser/model" + "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/sessiontxn" + "github.com/pingcap/tidb/table" + "github.com/pingcap/tidb/tablecodec" + "github.com/pingcap/tidb/testkit" + "github.com/pingcap/tidb/util/logutil" + "github.com/pingcap/tidb/util/mathutil" + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +type allTableData struct { + keys [][]byte + vals [][]byte + tp []string +} + +// TODO: Create a more generic function that gets all accessible table ids +// from all schemas, and checks the full key space so that there are no +// keys for non-existing table IDs. Also figure out how to wait for deleteRange +// Checks that there are no accessible data after an existing table +// assumes that tableIDs are only increasing. +// To be used during failure testing of ALTER, to make sure cleanup is done. +func noNewTablesAfter(t *testing.T, tk *testkit.TestKit, ctx sessionctx.Context, tbl table.Table) { + waitForGC := tk.MustQuery(`select start_key, end_key from mysql.gc_delete_range`).Rows() + require.NoError(t, sessiontxn.NewTxn(context.Background(), ctx)) + txn, err := ctx.Txn(true) + require.NoError(t, err) + defer func() { + err := txn.Rollback() + require.NoError(t, err) + }() + // Get max tableID (if partitioned) + tblID := tbl.Meta().ID + if pt := tbl.GetPartitionedTable(); pt != nil { + defs := pt.Meta().Partition.Definitions + { + for i := range defs { + tblID = mathutil.Max[int64](tblID, defs[i].ID) + } + } + } + prefix := tablecodec.EncodeTablePrefix(tblID + 1) + it, err := txn.Iter(prefix, nil) + require.NoError(t, err) +ROW: + for it.Valid() { + for _, rowGC := range waitForGC { + // OK if queued for range delete / GC + hexString := fmt.Sprintf("%v", rowGC[0]) + start, err := hex.DecodeString(hexString) + require.NoError(t, err) + hexString = fmt.Sprintf("%v", rowGC[1]) + end, err := hex.DecodeString(hexString) + require.NoError(t, err) + if bytes.Compare(start, it.Key()) >= 0 && bytes.Compare(it.Key(), end) < 0 { + it.Close() + it, err = txn.Iter(end, nil) + require.NoError(t, err) + continue ROW + } + } + foundTblID := tablecodec.DecodeTableID(it.Key()) + // There are internal table ids starting from MaxInt48 -1 and allocating decreasing ids + // Allow 0xFF of them, See JobTableID, ReorgTableID, HistoryTableID, MDLTableID + require.False(t, it.Key()[0] == 't' && foundTblID < 0xFFFFFFFFFF00, "Found table data after highest physical Table ID %d < %d", tblID, foundTblID) + break + } +} + +func getAllDataForPhysicalTable(t *testing.T, ctx sessionctx.Context, physTable table.PhysicalTable) allTableData { + require.NoError(t, sessiontxn.NewTxn(context.Background(), ctx)) + txn, err := ctx.Txn(true) + require.NoError(t, err) + defer func() { + err := txn.Rollback() + require.NoError(t, err) + }() + + all := allTableData{ + keys: make([][]byte, 0), + vals: make([][]byte, 0), + tp: make([]string, 0), + } + pid := physTable.GetPhysicalID() + prefix := tablecodec.EncodeTablePrefix(pid) + it, err := txn.Iter(prefix, nil) + require.NoError(t, err) + for it.Valid() { + if !it.Key().HasPrefix(prefix) { + break + } + all.keys = append(all.keys, it.Key()) + all.vals = append(all.vals, it.Value()) + if tablecodec.IsRecordKey(it.Key()) { + all.tp = append(all.tp, "Record") + tblID, kv, _ := tablecodec.DecodeRecordKey(it.Key()) + require.Equal(t, pid, tblID) + vals, _ := tablecodec.DecodeValuesBytesToStrings(it.Value()) + logutil.BgLogger().Info("Record", + zap.Int64("pid", tblID), + zap.Stringer("key", kv), + zap.Strings("values", vals)) + } else if tablecodec.IsIndexKey(it.Key()) { + all.tp = append(all.tp, "Index") + } else { + all.tp = append(all.tp, "Other") + } + err = it.Next() + require.NoError(t, err) + } + return all +} + +func TestReorganizeRangePartition(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("create database ReorgPartition") + tk.MustExec("use ReorgPartition") + tk.MustExec(`create table t (a int unsigned PRIMARY KEY, b varchar(255), c int, key (b), key (c,b)) partition by range (a) ` + + `(partition p0 values less than (10),` + + ` partition p1 values less than (20),` + + ` partition pMax values less than (MAXVALUE))`) + tk.MustExec(`insert into t values (1,"1",1), (12,"12",21),(23,"23",32),(34,"34",43),(45,"45",54),(56,"56",65)`) + tk.MustQuery(`select * from t where c < 40`).Sort().Check(testkit.Rows(""+ + "1 1 1", + "12 12 21", + "23 23 32")) + tk.MustExec(`alter table t reorganize partition pMax into (partition p2 values less than (30), partition pMax values less than (MAXVALUE))`) + tk.MustExec(`admin check table t`) + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " PRIMARY KEY (`a`) /*T![clustered_index] CLUSTERED */,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p0` VALUES LESS THAN (10),\n" + + " PARTITION `p1` VALUES LESS THAN (20),\n" + + " PARTITION `p2` VALUES LESS THAN (30),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))")) + tk.MustQuery(`select * from t`).Sort().Check(testkit.Rows(""+ + "1 1 1", + "12 12 21", + "23 23 32", + "34 34 43", + "45 45 54", + "56 56 65")) + tk.MustQuery(`select * from t partition (p0)`).Sort().Check(testkit.Rows("" + + "1 1 1")) + tk.MustQuery(`select * from t partition (p1)`).Sort().Check(testkit.Rows("" + + "12 12 21")) + tk.MustQuery(`select * from t partition (p2)`).Sort().Check(testkit.Rows("" + + "23 23 32")) + tk.MustQuery(`select * from t partition (pMax)`).Sort().Check(testkit.Rows(""+ + "34 34 43", + "45 45 54", + "56 56 65")) + tk.MustQuery(`select * from t where b > "1"`).Sort().Check(testkit.Rows(""+ + "12 12 21", + "23 23 32", + "34 34 43", + "45 45 54", + "56 56 65")) + tk.MustQuery(`select * from t where c < 40`).Sort().Check(testkit.Rows(""+ + "1 1 1", + "12 12 21", + "23 23 32")) + tk.MustExec(`alter table t reorganize partition p2,pMax into (partition p2 values less than (35),partition p3 values less than (47), partition pMax values less than (MAXVALUE))`) + tk.MustExec(`admin check table t`) + tk.MustQuery(`select * from t`).Sort().Check(testkit.Rows(""+ + "1 1 1", + "12 12 21", + "23 23 32", + "34 34 43", + "45 45 54", + "56 56 65")) + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " PRIMARY KEY (`a`) /*T![clustered_index] CLUSTERED */,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p0` VALUES LESS THAN (10),\n" + + " PARTITION `p1` VALUES LESS THAN (20),\n" + + " PARTITION `p2` VALUES LESS THAN (35),\n" + + " PARTITION `p3` VALUES LESS THAN (47),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))")) + tk.MustQuery(`select * from t partition (p0)`).Sort().Check(testkit.Rows("" + + "1 1 1")) + tk.MustQuery(`select * from t partition (p1)`).Sort().Check(testkit.Rows("" + + "12 12 21")) + tk.MustQuery(`select * from t partition (p2)`).Sort().Check(testkit.Rows(""+ + "23 23 32", + "34 34 43")) + tk.MustQuery(`select * from t partition (p3)`).Sort().Check(testkit.Rows("" + + "45 45 54")) + tk.MustQuery(`select * from t partition (pMax)`).Sort().Check(testkit.Rows("" + + "56 56 65")) + tk.MustExec(`alter table t reorganize partition p0,p1 into (partition p1 values less than (20))`) + tk.MustExec(`admin check table t`) + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " PRIMARY KEY (`a`) /*T![clustered_index] CLUSTERED */,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p1` VALUES LESS THAN (20),\n" + + " PARTITION `p2` VALUES LESS THAN (35),\n" + + " PARTITION `p3` VALUES LESS THAN (47),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))")) + tk.MustQuery(`select * from t`).Sort().Check(testkit.Rows(""+ + "1 1 1", + "12 12 21", + "23 23 32", + "34 34 43", + "45 45 54", + "56 56 65")) + tk.MustExec(`alter table t drop index b`) + tk.MustExec(`alter table t drop index c`) + tk.MustExec(`admin check table t`) + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " PRIMARY KEY (`a`) /*T![clustered_index] CLUSTERED */\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p1` VALUES LESS THAN (20),\n" + + " PARTITION `p2` VALUES LESS THAN (35),\n" + + " PARTITION `p3` VALUES LESS THAN (47),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))")) + tk.MustExec(`create table t2 (a int unsigned not null, b varchar(255), c int, key (b), key (c,b)) partition by range (a) ` + + "(PARTITION `p1` VALUES LESS THAN (20),\n" + + " PARTITION `p2` VALUES LESS THAN (35),\n" + + " PARTITION `p3` VALUES LESS THAN (47),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))") + tk.MustExec(`insert into t2 select * from t`) + // Not allowed to change the start range! + tk.MustGetErrCode(`alter table t2 reorganize partition p2 into (partition p2a values less than (20), partition p2b values less than (36))`, + errno.ErrRangeNotIncreasing) + // Not allowed to change the end range! + tk.MustGetErrCode(`alter table t2 reorganize partition p2 into (partition p2a values less than (30), partition p2b values less than (36))`, errno.ErrRangeNotIncreasing) + tk.MustGetErrCode(`alter table t2 reorganize partition p2 into (partition p2a values less than (30), partition p2b values less than (34))`, errno.ErrRangeNotIncreasing) + // Also not allowed to change from MAXVALUE to something else IF there are values in the removed range! + tk.MustContainErrMsg(`alter table t2 reorganize partition pMax into (partition p2b values less than (50))`, "[table:1526]Table has no partition for value 56") + tk.MustQuery(`show create table t2`).Check(testkit.Rows("" + + "t2 CREATE TABLE `t2` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p1` VALUES LESS THAN (20),\n" + + " PARTITION `p2` VALUES LESS THAN (35),\n" + + " PARTITION `p3` VALUES LESS THAN (47),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))")) + // But allowed to change from MAXVALUE if no existing values is outside the new range! + tk.MustExec(`alter table t2 reorganize partition pMax into (partition p4 values less than (90))`) + tk.MustExec(`admin check table t2`) + tk.MustQuery(`show create table t2`).Check(testkit.Rows("" + + "t2 CREATE TABLE `t2` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p1` VALUES LESS THAN (20),\n" + + " PARTITION `p2` VALUES LESS THAN (35),\n" + + " PARTITION `p3` VALUES LESS THAN (47),\n" + + " PARTITION `p4` VALUES LESS THAN (90))")) +} + +func TestReorganizeListPartition(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("create database ReorgListPartition") + tk.MustExec("use ReorgListPartition") + tk.MustExec(`create table t (a int, b varchar(55), c int) partition by list (a)` + + ` (partition p1 values in (12,23,51,14), partition p2 values in (24,63), partition p3 values in (45))`) + tk.MustExec(`insert into t values (12,"12",21), (24,"24",42),(51,"51",15),(23,"23",32),(63,"63",36),(45,"45",54)`) + tk.MustExec(`alter table t reorganize partition p1 into (partition p0 values in (12,51,13), partition p1 values in (23))`) + tk.MustExec(`admin check table t`) + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(11) DEFAULT NULL,\n" + + " `b` varchar(55) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY LIST (`a`)\n" + + "(PARTITION `p0` VALUES IN (12,51,13),\n" + + " PARTITION `p1` VALUES IN (23),\n" + + " PARTITION `p2` VALUES IN (24,63),\n" + + " PARTITION `p3` VALUES IN (45))")) + tk.MustExec(`alter table t add primary key (a), add key (b), add key (c,b)`) + + // Note: MySQL cannot reorganize two non-consecutive list partitions :) + // ERROR 1519 (HY000): When reorganizing a set of partitions they must be in consecutive order + // https://bugs.mysql.com/bug.php?id=106011 + // https://bugs.mysql.com/bug.php?id=109939 + tk.MustExec(`alter table t reorganize partition p1, p3 into (partition pa values in (45,23,15))`) + tk.MustExec(`admin check table t`) + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(11) NOT NULL,\n" + + " `b` varchar(55) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " PRIMARY KEY (`a`) /*T![clustered_index] NONCLUSTERED */,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY LIST (`a`)\n" + + "(PARTITION `p0` VALUES IN (12,51,13),\n" + + " PARTITION `pa` VALUES IN (45,23,15),\n" + + " PARTITION `p2` VALUES IN (24,63))")) + tk.MustGetErrCode(`alter table t modify a varchar(20)`, errno.ErrUnsupportedDDLOperation) +} + +type TestReorgDDLCallback struct { + *callback.TestDDLCallback + syncChan chan bool +} + +func (tc *TestReorgDDLCallback) OnChanged(err error) error { + err = tc.TestDDLCallback.OnChanged(err) + <-tc.syncChan + // We want to wait here + <-tc.syncChan + return err +} + +func TestReorgPartitionConcurrent(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + schemaName := "ReorgPartConcurrent" + tk.MustExec("create database " + schemaName) + tk.MustExec("use " + schemaName) + tk.MustExec(`create table t (a int unsigned PRIMARY KEY, b varchar(255), c int, key (b), key (c,b))` + + ` partition by range (a) ` + + `(partition p0 values less than (10),` + + ` partition p1 values less than (20),` + + ` partition pMax values less than (MAXVALUE))`) + tk.MustExec(`insert into t values (1,"1",1), (10,"10",10),(23,"23",32),(34,"34",43),(45,"45",54),(56,"56",65)`) + dom := domain.GetDomain(tk.Session()) + originHook := dom.DDL().GetHook() + defer dom.DDL().SetHook(originHook) + syncOnChanged := make(chan bool) + defer close(syncOnChanged) + hook := &TestReorgDDLCallback{TestDDLCallback: &callback.TestDDLCallback{Do: dom}, syncChan: syncOnChanged} + dom.DDL().SetHook(hook) + + wait := make(chan bool) + defer close(wait) + + currState := model.StateNone + hook.OnJobRunBeforeExported = func(job *model.Job) { + if job.Type == model.ActionReorganizePartition && + (job.SchemaState == model.StateDeleteOnly || + job.SchemaState == model.StateWriteOnly || + job.SchemaState == model.StateWriteReorganization || + job.SchemaState == model.StateDeleteReorganization) && + currState != job.SchemaState { + currState = job.SchemaState + <-wait + <-wait + } + } + alterErr := make(chan error, 1) + go backgroundExec(store, schemaName, "alter table t reorganize partition p1 into (partition p1a values less than (15), partition p1b values less than (20))", alterErr) + + wait <- true + // StateDeleteOnly + deleteOnlyInfoSchema := sessiontxn.GetTxnManager(tk.Session()).GetTxnInfoSchema() + wait <- true + + // StateWriteOnly + wait <- true + tk.MustExec(`insert into t values (11, "11", 11),(12,"12",21)`) + tk.MustExec(`admin check table t`) + writeOnlyInfoSchema := sessiontxn.GetTxnManager(tk.Session()).GetTxnInfoSchema() + require.Equal(t, int64(1), writeOnlyInfoSchema.SchemaMetaVersion()-deleteOnlyInfoSchema.SchemaMetaVersion()) + deleteOnlyTbl, err := deleteOnlyInfoSchema.TableByName(model.NewCIStr(schemaName), model.NewCIStr("t")) + require.NoError(t, err) + writeOnlyTbl, err := writeOnlyInfoSchema.TableByName(model.NewCIStr(schemaName), model.NewCIStr("t")) + require.NoError(t, err) + writeOnlyParts := writeOnlyTbl.Meta().Partition + writeOnlyTbl.Meta().Partition = deleteOnlyTbl.Meta().Partition + // If not DeleteOnly is working, then this would show up when reorg is done + tk.MustExec(`delete from t where a = 11`) + tk.MustExec(`update t set b = "12b", c = 12 where a = 12`) + tk.MustExec(`admin check table t`) + writeOnlyTbl.Meta().Partition = writeOnlyParts + tk.MustExec(`admin check table t`) + wait <- true + + // StateWriteReorganization + wait <- true + tk.MustExec(`insert into t values (14, "14", 14),(15, "15",15)`) + writeReorgInfoSchema := sessiontxn.GetTxnManager(tk.Session()).GetTxnInfoSchema() + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " PRIMARY KEY (`a`) /*T![clustered_index] CLUSTERED */,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p0` VALUES LESS THAN (10),\n" + + " PARTITION `p1` VALUES LESS THAN (20),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))")) + wait <- true + + // StateDeleteReorganization + wait <- true + tk.MustQuery(`select * from t where c between 10 and 22`).Sort().Check(testkit.Rows(""+ + "10 10 10", + "12 12b 12", + "14 14 14", + "15 15 15")) + deleteReorgInfoSchema := sessiontxn.GetTxnManager(tk.Session()).GetTxnInfoSchema() + require.Equal(t, int64(1), deleteReorgInfoSchema.SchemaMetaVersion()-writeReorgInfoSchema.SchemaMetaVersion()) + tk.MustExec(`insert into t values (16, "16", 16)`) + oldTbl, err := writeReorgInfoSchema.TableByName(model.NewCIStr(schemaName), model.NewCIStr("t")) + require.NoError(t, err) + partDef := oldTbl.Meta().Partition.Definitions[1] + require.Equal(t, "p1", partDef.Name.O) + rows := getNumRowsFromPartitionDefs(t, tk, oldTbl, oldTbl.Meta().Partition.Definitions[1:2]) + require.Equal(t, 5, rows) + currTbl, err := deleteReorgInfoSchema.TableByName(model.NewCIStr(schemaName), model.NewCIStr("t")) + require.NoError(t, err) + currPart := currTbl.Meta().Partition + currTbl.Meta().Partition = oldTbl.Meta().Partition + tk.MustQuery(`select * from t where b = "16"`).Sort().Check(testkit.Rows("16 16 16")) + tk.MustExec(`admin check table t`) + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " PRIMARY KEY (`a`) /*T![clustered_index] CLUSTERED */,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p0` VALUES LESS THAN (10),\n" + + " PARTITION `p1` VALUES LESS THAN (20),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))")) + tk.MustQuery(`select * from t partition (p1)`).Sort().Check(testkit.Rows(""+ + "10 10 10", + "12 12b 12", + "14 14 14", + "15 15 15", + "16 16 16")) + currTbl.Meta().Partition = currPart + wait <- true + syncOnChanged <- true + // This reads the new schema (Schema update completed) + tk.MustQuery(`select * from t where c between 10 and 22`).Sort().Check(testkit.Rows(""+ + "10 10 10", + "12 12b 12", + "14 14 14", + "15 15 15", + "16 16 16")) + tk.MustExec(`admin check table t`) + newInfoSchema := sessiontxn.GetTxnManager(tk.Session()).GetTxnInfoSchema() + require.Equal(t, int64(1), newInfoSchema.SchemaMetaVersion()-deleteReorgInfoSchema.SchemaMetaVersion()) + oldTbl, err = deleteReorgInfoSchema.TableByName(model.NewCIStr(schemaName), model.NewCIStr("t")) + require.NoError(t, err) + partDef = oldTbl.Meta().Partition.Definitions[1] + require.Equal(t, "p1a", partDef.Name.O) + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " PRIMARY KEY (`a`) /*T![clustered_index] CLUSTERED */,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p0` VALUES LESS THAN (10),\n" + + " PARTITION `p1a` VALUES LESS THAN (15),\n" + + " PARTITION `p1b` VALUES LESS THAN (20),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))")) + newTbl, err := deleteReorgInfoSchema.TableByName(model.NewCIStr(schemaName), model.NewCIStr("t")) + require.NoError(t, err) + newPart := newTbl.Meta().Partition + newTbl.Meta().Partition = oldTbl.Meta().Partition + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " PRIMARY KEY (`a`) /*T![clustered_index] CLUSTERED */,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p0` VALUES LESS THAN (10),\n" + + " PARTITION `p1a` VALUES LESS THAN (15),\n" + + " PARTITION `p1b` VALUES LESS THAN (20),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))")) + tk.MustExec(`admin check table t`) + newTbl.Meta().Partition = newPart + syncOnChanged <- true + require.NoError(t, <-alterErr) +} + +func TestReorgPartitionFailConcurrent(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + schemaName := "ReorgPartFailConcurrent" + tk.MustExec("create database " + schemaName) + tk.MustExec("use " + schemaName) + tk.MustExec(`create table t (a int unsigned PRIMARY KEY, b varchar(255), c int, key (b), key (c,b))` + + ` partition by range (a) ` + + `(partition p0 values less than (10),` + + ` partition p1 values less than (20),` + + ` partition pMax values less than (MAXVALUE))`) + tk.MustExec(`insert into t values (1,"1",1), (12,"12",21),(23,"23",32),(34,"34",43),(45,"45",54),(56,"56",65)`) + dom := domain.GetDomain(tk.Session()) + originHook := dom.DDL().GetHook() + defer dom.DDL().SetHook(originHook) + hook := &callback.TestDDLCallback{Do: dom} + dom.DDL().SetHook(hook) + + wait := make(chan bool) + defer close(wait) + + // Test insert of duplicate key during copy phase + injected := false + hook.OnJobRunBeforeExported = func(job *model.Job) { + if job.Type == model.ActionReorganizePartition && job.SchemaState == model.StateWriteReorganization && !injected { + injected = true + <-wait + <-wait + } + } + alterErr := make(chan error, 1) + go backgroundExec(store, schemaName, "alter table t reorganize partition p1 into (partition p1a values less than (15), partition p1b values less than (20))", alterErr) + wait <- true + tk.MustExec(`insert into t values (14, "14", 14),(15, "15",15)`) + tk.MustGetErrCode(`insert into t values (11, "11", 11),(12,"duplicate PK 💥", 13)`, errno.ErrDupEntry) + tk.MustExec(`admin check table t`) + wait <- true + require.NoError(t, <-alterErr) + tk.MustQuery(`select * from t where c between 10 and 22`).Sort().Check(testkit.Rows(""+ + "12 12 21", + "14 14 14", + "15 15 15")) + tk.MustExec(`admin check table t`) + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " PRIMARY KEY (`a`) /*T![clustered_index] CLUSTERED */,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p0` VALUES LESS THAN (10),\n" + + " PARTITION `p1a` VALUES LESS THAN (15),\n" + + " PARTITION `p1b` VALUES LESS THAN (20),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))")) + + // Test reorg of duplicate key + prevState := model.StateNone + hook.OnJobRunBeforeExported = func(job *model.Job) { + if job.Type == model.ActionReorganizePartition && + job.SchemaState == model.StateWriteReorganization && + job.SnapshotVer == 0 && + prevState != job.SchemaState { + prevState = job.SchemaState + <-wait + <-wait + } + if job.Type == model.ActionReorganizePartition && + job.SchemaState == model.StateDeleteReorganization && + prevState != job.SchemaState { + prevState = job.SchemaState + <-wait + <-wait + } + } + go backgroundExec(store, schemaName, "alter table t reorganize partition p1a,p1b into (partition p1a values less than (14), partition p1b values less than (17), partition p1c values less than (20))", alterErr) + wait <- true + infoSchema := sessiontxn.GetTxnManager(tk.Session()).GetTxnInfoSchema() + tbl, err := infoSchema.TableByName(model.NewCIStr(schemaName), model.NewCIStr("t")) + require.NoError(t, err) + require.Equal(t, 0, getNumRowsFromPartitionDefs(t, tk, tbl, tbl.Meta().Partition.AddingDefinitions)) + tk.MustExec(`delete from t where a = 14`) + tk.MustExec(`insert into t values (13, "13", 31),(14,"14b",14),(16, "16",16)`) + tk.MustExec(`admin check table t`) + wait <- true + wait <- true + tbl, err = infoSchema.TableByName(model.NewCIStr(schemaName), model.NewCIStr("t")) + require.NoError(t, err) + require.Equal(t, 5, getNumRowsFromPartitionDefs(t, tk, tbl, tbl.Meta().Partition.AddingDefinitions)) + tk.MustExec(`delete from t where a = 15`) + tk.MustExec(`insert into t values (11, "11", 11),(15,"15b",15),(17, "17",17)`) + tk.MustExec(`admin check table t`) + wait <- true + require.NoError(t, <-alterErr) + + tk.MustExec(`admin check table t`) + tk.MustQuery(`select * from t where a between 10 and 22`).Sort().Check(testkit.Rows(""+ + "11 11 11", + "12 12 21", + "13 13 31", + "14 14b 14", + "15 15b 15", + "16 16 16", + "17 17 17")) + tk.MustQuery(`select * from t where c between 10 and 22`).Sort().Check(testkit.Rows(""+ + "11 11 11", + "12 12 21", + "14 14b 14", + "15 15b 15", + "16 16 16", + "17 17 17")) + tk.MustQuery(`select * from t where b between "10" and "22"`).Sort().Check(testkit.Rows(""+ + "11 11 11", + "12 12 21", + "13 13 31", + "14 14b 14", + "15 15b 15", + "16 16 16", + "17 17 17")) +} + +func getNumRowsFromPartitionDefs(t *testing.T, tk *testkit.TestKit, tbl table.Table, defs []model.PartitionDefinition) int { + ctx := tk.Session() + pt := tbl.GetPartitionedTable() + require.NotNil(t, pt) + cnt := 0 + for _, def := range defs { + data := getAllDataForPhysicalTable(t, ctx, pt.GetPartition(def.ID)) + require.True(t, len(data.keys) == len(data.vals)) + require.True(t, len(data.keys) == len(data.tp)) + for _, s := range data.tp { + if s == "Record" { + cnt++ + } + } + } + return cnt +} + +func TestReorgPartitionFailInject(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + schemaName := "ReorgPartFailInjectConcurrent" + tk.MustExec("create database " + schemaName) + tk.MustExec("use " + schemaName) + tk.MustExec(`create table t (a int unsigned PRIMARY KEY, b varchar(255), c int, key (b), key (c,b))` + + ` partition by range (a) ` + + `(partition p0 values less than (10),` + + ` partition p1 values less than (20),` + + ` partition pMax values less than (MAXVALUE))`) + tk.MustExec(`insert into t values (1,"1",1), (12,"12",21),(23,"23",32),(34,"34",43),(45,"45",54),(56,"56",65)`) + + dom := domain.GetDomain(tk.Session()) + originHook := dom.DDL().GetHook() + defer dom.DDL().SetHook(originHook) + hook := &callback.TestDDLCallback{Do: dom} + dom.DDL().SetHook(hook) + + wait := make(chan bool) + defer close(wait) + + injected := false + hook.OnJobRunBeforeExported = func(job *model.Job) { + if job.Type == model.ActionReorganizePartition && job.SchemaState == model.StateWriteReorganization && !injected { + injected = true + <-wait + <-wait + } + } + alterErr := make(chan error, 1) + go backgroundExec(store, schemaName, "alter table t reorganize partition p1 into (partition p1a values less than (15), partition p1b values less than (20))", alterErr) + wait <- true + tk.MustExec(`insert into t values (14, "14", 14),(15, "15",15)`) + tk.MustGetErrCode(`insert into t values (11, "11", 11),(12,"duplicate PK 💥", 13)`, errno.ErrDupEntry) + tk.MustExec(`admin check table t`) + wait <- true + require.NoError(t, <-alterErr) + tk.MustExec(`admin check table t`) + tk.MustQuery(`select * from t where c between 10 and 22`).Sort().Check(testkit.Rows(""+ + "12 12 21", + "14 14 14", + "15 15 15")) + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " PRIMARY KEY (`a`) /*T![clustered_index] CLUSTERED */,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p0` VALUES LESS THAN (10),\n" + + " PARTITION `p1a` VALUES LESS THAN (15),\n" + + " PARTITION `p1b` VALUES LESS THAN (20),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))")) +} + +func TestReorgPartitionRollback(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + schemaName := "ReorgPartRollback" + tk.MustExec("create database " + schemaName) + tk.MustExec("use " + schemaName) + tk.MustExec(`create table t (a int unsigned PRIMARY KEY, b varchar(255), c int, key (b), key (c,b))` + + ` partition by range (a) ` + + `(partition p0 values less than (10),` + + ` partition p1 values less than (20),` + + ` partition pMax values less than (MAXVALUE))`) + tk.MustExec(`insert into t values (1,"1",1), (12,"12",21),(23,"23",32),(34,"34",43),(45,"45",54),(56,"56",65)`) + // TODO: Check that there are no additional placement rules, + // bundles, or ranges with non-completed tableIDs + // (partitions used during reorg, but was dropped) + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/ddl/mockUpdateVersionAndTableInfoErr", `return(true)`)) + tk.MustExecToErr("alter table t reorganize partition p1 into (partition p1a values less than (15), partition p1b values less than (20))") + tk.MustExec(`admin check table t`) + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/ddl/mockUpdateVersionAndTableInfoErr")) + ctx := tk.Session() + is := domain.GetDomain(ctx).InfoSchema() + tbl, err := is.TableByName(model.NewCIStr(schemaName), model.NewCIStr("t")) + require.NoError(t, err) + noNewTablesAfter(t, tk, ctx, tbl) + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/ddl/reorgPartitionAfterDataCopy", `return(true)`)) + defer func() { + err := failpoint.Disable("github.com/pingcap/tidb/ddl/reorgPartitionAfterDataCopy") + require.NoError(t, err) + }() + tk.MustExecToErr("alter table t reorganize partition p1 into (partition p1a values less than (15), partition p1b values less than (20))") + tk.MustExec(`admin check table t`) + tk.MustQuery(`show create table t`).Check(testkit.Rows("" + + "t CREATE TABLE `t` (\n" + + " `a` int(10) unsigned NOT NULL,\n" + + " `b` varchar(255) DEFAULT NULL,\n" + + " `c` int(11) DEFAULT NULL,\n" + + " PRIMARY KEY (`a`) /*T![clustered_index] CLUSTERED */,\n" + + " KEY `b` (`b`),\n" + + " KEY `c` (`c`,`b`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin\n" + + "PARTITION BY RANGE (`a`)\n" + + "(PARTITION `p0` VALUES LESS THAN (10),\n" + + " PARTITION `p1` VALUES LESS THAN (20),\n" + + " PARTITION `pMax` VALUES LESS THAN (MAXVALUE))")) + + tbl, err = is.TableByName(model.NewCIStr(schemaName), model.NewCIStr("t")) + require.NoError(t, err) + noNewTablesAfter(t, tk, ctx, tbl) +} + +func TestReorgPartitionData(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + schemaName := "ReorgPartData" + tk.MustExec("create database " + schemaName) + tk.MustExec("use " + schemaName) + tk.MustExec(`SET @@session.sql_mode = default`) + tk.MustExec(`create table t (a int PRIMARY KEY AUTO_INCREMENT, b varchar(255), c int, d datetime, key (b), key (c,b)) partition by range (a) (partition p1 values less than (0), partition p1M values less than (1000000))`) + tk.MustContainErrMsg(`insert into t values (0, "Zero value!", 0, '2022-02-30')`, "[table:1292]Incorrect datetime value: '2022-02-30' for column 'd' at row 1") + tk.MustExec(`SET @@session.sql_mode = 'ALLOW_INVALID_DATES,NO_AUTO_VALUE_ON_ZERO'`) + tk.MustExec(`insert into t values (0, "Zero value!", 0, '2022-02-30')`) + tk.MustQuery(`show warnings`).Check(testkit.Rows()) + tk.MustQuery(`select * from t`).Sort().Check(testkit.Rows("0 Zero value! 0 2022-02-30 00:00:00")) + tk.MustExec(`SET @@session.sql_mode = default`) + tk.MustExec(`alter table t reorganize partition p1M into (partition p0 values less than (1), partition p2M values less than (2000000))`) + tk.MustQuery(`select * from t`).Sort().Check(testkit.Rows("0 Zero value! 0 2022-02-30 00:00:00")) + tk.MustExec(`admin check table t`) +} diff --git a/ddl/sanity_check.go b/ddl/sanity_check.go index 2dcc17a8131f4..63eb94d4592a4 100644 --- a/ddl/sanity_check.go +++ b/ddl/sanity_check.go @@ -98,7 +98,8 @@ func expectedDeleteRangeCnt(ctx delRangeCntCtx, job *model.Job) (int, error) { return 0, errors.Trace(err) } return mathutil.Max(len(physicalTableIDs), 1), nil - case model.ActionDropTablePartition, model.ActionTruncateTablePartition: + case model.ActionDropTablePartition, model.ActionTruncateTablePartition, + model.ActionReorganizePartition: var physicalTableIDs []int64 if err := job.DecodeArgs(&physicalTableIDs); err != nil { return 0, errors.Trace(err) diff --git a/ddl/split_region.go b/ddl/split_region.go index b201bf65538f3..ffbcb7439292d 100644 --- a/ddl/split_region.go +++ b/ddl/split_region.go @@ -31,7 +31,7 @@ import ( ) func splitPartitionTableRegion(ctx sessionctx.Context, store kv.SplittableStore, tbInfo *model.TableInfo, pi *model.PartitionInfo, scatter bool) { - // Max partition count is 4096, should we sample and just choose some of the partition to split? + // Max partition count is 8192, should we sample and just choose some partitions to split? regionIDs := make([]uint64, 0, len(pi.Definitions)) ctxWithTimeout, cancel := context.WithTimeout(context.Background(), ctx.GetSessionVars().GetSplitRegionTimeout()) defer cancel() diff --git a/ddl/tiflash_replica_test.go b/ddl/tiflash_replica_test.go index 08a91c1086caa..abd7275e4669b 100644 --- a/ddl/tiflash_replica_test.go +++ b/ddl/tiflash_replica_test.go @@ -303,7 +303,7 @@ func TestCreateTableWithLike2(t *testing.T) { } onceChecker.Store(job.ID, true) - go backgroundExec(store, "create table t2 like t1", doneCh) + go backgroundExec(store, "test", "create table t2 like t1", doneCh) } } originalHook := dom.DDL().GetHook() diff --git a/domain/domain.go b/domain/domain.go index b4a7a2770afd3..e4f1a761d6b58 100644 --- a/domain/domain.go +++ b/domain/domain.go @@ -208,6 +208,7 @@ func (do *Domain) loadInfoSchema(startTS uint64) (infoschema.InfoSchema, bool, i zap.Int64("currentSchemaVersion", currentSchemaVersion), zap.Int64("neededSchemaVersion", neededSchemaVersion), zap.Duration("start time", time.Since(startTime)), + zap.Int64("gotSchemaVersion", is.SchemaMetaVersion()), zap.Int64s("phyTblIDs", relatedChanges.PhyTblIDS), zap.Uint64s("actionTypes", relatedChanges.ActionTypes)) return is, false, currentSchemaVersion, relatedChanges, nil diff --git a/executor/adapter.go b/executor/adapter.go index 022caf555535f..7702f2e05875a 100644 --- a/executor/adapter.go +++ b/executor/adapter.go @@ -214,7 +214,7 @@ type TelemetryInfo struct { UseNonRecursive bool UseRecursive bool UseMultiSchemaChange bool - UesExchangePartition bool + UseExchangePartition bool UseFlashbackToCluster bool PartitionTelemetry *PartitionTelemetryInfo AccountLockTelemetry *AccountLockTelemetryInfo @@ -238,6 +238,7 @@ type PartitionTelemetryInfo struct { UseAddIntervalPartition bool UseDropIntervalPartition bool UseCompactTablePartition bool + UseReorganizePartition bool } // AccountLockTelemetryInfo records account lock/unlock information during execution diff --git a/executor/builder.go b/executor/builder.go index 213e21ad52daf..132ae66272c69 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -1119,7 +1119,12 @@ func (b *executorBuilder) setTelemetryInfo(v *plannercore.DDL) { } b.Ti.PartitionTelemetry.UseAddIntervalPartition = true case ast.AlterTableExchangePartition: - b.Ti.UesExchangePartition = true + b.Ti.UseExchangePartition = true + case ast.AlterTableReorganizePartition: + if b.Ti.PartitionTelemetry == nil { + b.Ti.PartitionTelemetry = &PartitionTelemetryInfo{} + } + b.Ti.PartitionTelemetry.UseReorganizePartition = true } } case *ast.CreateTableStmt: @@ -3560,10 +3565,14 @@ func getPartitionKeyColOffsets(keyColIDs []int64, pt table.PartitionedTable) []i keyColOffsets[i] = offset } - pe, err := pt.(interface { - PartitionExpr() (*tables.PartitionExpr, error) - }).PartitionExpr() - if err != nil { + t, ok := pt.(interface { + PartitionExpr() *tables.PartitionExpr + }) + if !ok { + return nil + } + pe := t.PartitionExpr() + if pe == nil { return nil } diff --git a/infoschema/builder.go b/infoschema/builder.go index 9ec5277e5c79c..3240e1c5c5c49 100644 --- a/infoschema/builder.go +++ b/infoschema/builder.go @@ -220,11 +220,13 @@ func (b *Builder) ApplyDiff(m *meta.Meta, diff *model.SchemaDiff) ([]int64, erro case model.ActionTruncateTablePartition, model.ActionTruncateTable: return b.applyTruncateTableOrPartition(m, diff) case model.ActionDropTable, model.ActionDropTablePartition: - return b.applyDropTableOrParition(m, diff) + return b.applyDropTableOrPartition(m, diff) case model.ActionRecoverTable: return b.applyRecoverTable(m, diff) case model.ActionCreateTables: return b.applyCreateTables(m, diff) + case model.ActionReorganizePartition: + return b.applyReorganizePartition(m, diff) case model.ActionFlashbackCluster: return []int64{-1}, nil default: @@ -275,7 +277,7 @@ func (b *Builder) applyTruncateTableOrPartition(m *meta.Meta, diff *model.Schema return tblIDs, nil } -func (b *Builder) applyDropTableOrParition(m *meta.Meta, diff *model.SchemaDiff) ([]int64, error) { +func (b *Builder) applyDropTableOrPartition(m *meta.Meta, diff *model.SchemaDiff) ([]int64, error) { tblIDs, err := b.applyTableUpdate(m, diff) if err != nil { return nil, errors.Trace(err) @@ -287,6 +289,22 @@ func (b *Builder) applyDropTableOrParition(m *meta.Meta, diff *model.SchemaDiff) return tblIDs, nil } +func (b *Builder) applyReorganizePartition(m *meta.Meta, diff *model.SchemaDiff) ([]int64, error) { + tblIDs, err := b.applyTableUpdate(m, diff) + if err != nil { + return nil, errors.Trace(err) + } + for _, opt := range diff.AffectedOpts { + if opt.OldTableID != 0 { + b.deleteBundle(b.is, opt.OldTableID) + } + if opt.TableID != 0 { + b.markTableBundleShouldUpdate(opt.TableID) + } + } + return tblIDs, nil +} + func (b *Builder) applyRecoverTable(m *meta.Meta, diff *model.SchemaDiff) ([]int64, error) { tblIDs, err := b.applyTableUpdate(m, diff) if err != nil { @@ -696,6 +714,8 @@ func (b *Builder) applyCreateTable(m *meta.Meta, dbInfo *model.DBInfo, tableID i switch tp { case model.ActionDropTablePartition: case model.ActionTruncateTablePartition: + // ReorganizePartition handle the bundles in applyReorganizePartition + case model.ActionReorganizePartition: default: pi := tblInfo.GetPartitionInfo() if pi != nil { diff --git a/infoschema/perfschema/tables.go b/infoschema/perfschema/tables.go index 9a5a36235cfa4..6889d61c3be85 100644 --- a/infoschema/perfschema/tables.go +++ b/infoschema/perfschema/tables.go @@ -207,6 +207,11 @@ func (vt *perfSchemaTable) Indices() []table.Index { return vt.indices } +// GetPartitionedTable implements table.Table GetPartitionedTable interface. +func (vt *perfSchemaTable) GetPartitionedTable() table.PartitionedTable { + return nil +} + // initTableIndices initializes the indices of the perfSchemaTable. func initTableIndices(t *perfSchemaTable) error { tblInfo := t.meta diff --git a/infoschema/tables.go b/infoschema/tables.go index 7a2631b4abe13..3a2711eb57147 100644 --- a/infoschema/tables.go +++ b/infoschema/tables.go @@ -2166,6 +2166,11 @@ func (it *infoschemaTable) Type() table.Type { return it.tp } +// GetPartitionedTable implements table.Table GetPartitionedTable interface. +func (it *infoschemaTable) GetPartitionedTable() table.PartitionedTable { + return nil +} + // VirtualTable is a dummy table.Table implementation. type VirtualTable struct{} diff --git a/metrics/ddl.go b/metrics/ddl.go index 5efe636c8a038..c97077ed0aea0 100644 --- a/metrics/ddl.go +++ b/metrics/ddl.go @@ -147,6 +147,8 @@ const ( LblAddIndex = "add_index" LblAddIndexMerge = "add_index_merge_tmp" LblModifyColumn = "modify_column" + + LblReorgPartition = "reorganize_partition" ) // GenerateReorgLabel returns the label with schema name and table name. diff --git a/metrics/telemetry.go b/metrics/telemetry.go index 69093252079c3..39838853e123d 100644 --- a/metrics/telemetry.go +++ b/metrics/telemetry.go @@ -169,6 +169,13 @@ var ( Name: "compact_partition_usage", Help: "Counter of compact table partition", }) + TelemetryReorganizePartitionCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: "tidb", + Subsystem: "telemetry", + Name: "reorganize_partition_usage", + Help: "Counter of alter table reorganize partition", + }) TelemetryDistReorgCnt = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: "tidb", @@ -304,6 +311,7 @@ type TablePartitionUsageCounter struct { TablePartitionAddIntervalPartitionsCnt int64 `json:"table_partition_add_interval_partitions_cnt"` TablePartitionDropIntervalPartitionsCnt int64 `json:"table_partition_drop_interval_partitions_cnt"` TablePartitionComactCnt int64 `json:"table_TablePartitionComactCnt"` + TablePartitionReorganizePartitionCnt int64 `json:"table_reorganize_partition_cnt"` } // ExchangePartitionUsageCounter records the usages of exchange partition. @@ -342,22 +350,24 @@ func (c TablePartitionUsageCounter) Cal(rhs TablePartitionUsageCounter) TablePar TablePartitionAddIntervalPartitionsCnt: c.TablePartitionAddIntervalPartitionsCnt - rhs.TablePartitionAddIntervalPartitionsCnt, TablePartitionDropIntervalPartitionsCnt: c.TablePartitionDropIntervalPartitionsCnt - rhs.TablePartitionDropIntervalPartitionsCnt, TablePartitionComactCnt: c.TablePartitionComactCnt - rhs.TablePartitionComactCnt, + TablePartitionReorganizePartitionCnt: c.TablePartitionReorganizePartitionCnt - rhs.TablePartitionReorganizePartitionCnt, } } // ResetTablePartitionCounter gets the TxnCommitCounter. func ResetTablePartitionCounter(pre TablePartitionUsageCounter) TablePartitionUsageCounter { return TablePartitionUsageCounter{ - TablePartitionCnt: readCounter(TelemetryTablePartitionCnt), - TablePartitionListCnt: readCounter(TelemetryTablePartitionListCnt), - TablePartitionRangeCnt: readCounter(TelemetryTablePartitionRangeCnt), - TablePartitionHashCnt: readCounter(TelemetryTablePartitionHashCnt), - TablePartitionRangeColumnsCnt: readCounter(TelemetryTablePartitionRangeColumnsCnt), - TablePartitionRangeColumnsGt1Cnt: readCounter(TelemetryTablePartitionRangeColumnsGt1Cnt), - TablePartitionRangeColumnsGt2Cnt: readCounter(TelemetryTablePartitionRangeColumnsGt2Cnt), - TablePartitionRangeColumnsGt3Cnt: readCounter(TelemetryTablePartitionRangeColumnsGt3Cnt), - TablePartitionListColumnsCnt: readCounter(TelemetryTablePartitionListColumnsCnt), - TablePartitionMaxPartitionsCnt: mathutil.Max(readCounter(TelemetryTablePartitionMaxPartitionsCnt)-pre.TablePartitionMaxPartitionsCnt, pre.TablePartitionMaxPartitionsCnt), + TablePartitionCnt: readCounter(TelemetryTablePartitionCnt), + TablePartitionListCnt: readCounter(TelemetryTablePartitionListCnt), + TablePartitionRangeCnt: readCounter(TelemetryTablePartitionRangeCnt), + TablePartitionHashCnt: readCounter(TelemetryTablePartitionHashCnt), + TablePartitionRangeColumnsCnt: readCounter(TelemetryTablePartitionRangeColumnsCnt), + TablePartitionRangeColumnsGt1Cnt: readCounter(TelemetryTablePartitionRangeColumnsGt1Cnt), + TablePartitionRangeColumnsGt2Cnt: readCounter(TelemetryTablePartitionRangeColumnsGt2Cnt), + TablePartitionRangeColumnsGt3Cnt: readCounter(TelemetryTablePartitionRangeColumnsGt3Cnt), + TablePartitionListColumnsCnt: readCounter(TelemetryTablePartitionListColumnsCnt), + TablePartitionMaxPartitionsCnt: mathutil.Max(readCounter(TelemetryTablePartitionMaxPartitionsCnt)-pre.TablePartitionMaxPartitionsCnt, pre.TablePartitionMaxPartitionsCnt), + TablePartitionReorganizePartitionCnt: readCounter(TelemetryReorganizePartitionCnt), } } @@ -378,6 +388,7 @@ func GetTablePartitionCounter() TablePartitionUsageCounter { TablePartitionAddIntervalPartitionsCnt: readCounter(TelemetryTablePartitionAddIntervalPartitionsCnt), TablePartitionDropIntervalPartitionsCnt: readCounter(TelemetryTablePartitionDropIntervalPartitionsCnt), TablePartitionComactCnt: readCounter(TelemetryCompactPartitionCnt), + TablePartitionReorganizePartitionCnt: readCounter(TelemetryReorganizePartitionCnt), } } diff --git a/parser/model/model.go b/parser/model/model.go index 838b74ca82930..d747b1d9fc4b7 100644 --- a/parser/model/model.go +++ b/parser/model/model.go @@ -768,6 +768,9 @@ func (t *TableInfo) Clone() *TableInfo { nt.ForeignKeys[i] = t.ForeignKeys[i].Clone() } + if t.Partition != nil { + nt.Partition = t.Partition.Clone() + } if t.TTLInfo != nil { nt.TTLInfo = t.TTLInfo.Clone() } @@ -1204,6 +1207,8 @@ type PartitionInfo struct { DroppingDefinitions []PartitionDefinition `json:"dropping_definitions"` States []PartitionState `json:"states"` Num uint64 `json:"num"` + // Only used during ReorganizePartition so far + DDLState SchemaState `json:"ddl_state"` } // Clone clones itself. @@ -1338,15 +1343,15 @@ func (ci *PartitionDefinition) MemoryUsage() (sum int64) { } // FindPartitionDefinitionByName finds PartitionDefinition by name. -func (t *TableInfo) FindPartitionDefinitionByName(partitionDefinitionName string) *PartitionDefinition { +func (pi *PartitionInfo) FindPartitionDefinitionByName(partitionDefinitionName string) int { lowConstrName := strings.ToLower(partitionDefinitionName) - definitions := t.Partition.Definitions + definitions := pi.Definitions for i := range definitions { if definitions[i].Name.L == lowConstrName { - return &t.Partition.Definitions[i] + return i } } - return nil + return -1 } // IndexColumn provides index column info. diff --git a/planner/core/point_get_plan.go b/planner/core/point_get_plan.go index 1b285a83a1596..e80fd32fc3414 100644 --- a/planner/core/point_get_plan.go +++ b/planner/core/point_get_plan.go @@ -1898,12 +1898,7 @@ func getPartitionExpr(ctx sessionctx.Context, tbl *model.TableInfo) *tables.Part } // PartitionExpr don't need columns and names for hash partition. - partitionExpr, err := partTable.PartitionExpr() - if err != nil { - return nil - } - - return partitionExpr + return partTable.PartitionExpr() } func getHashPartitionColumnName(ctx sessionctx.Context, tbl *model.TableInfo) *ast.ColumnName { @@ -1920,10 +1915,7 @@ func getHashPartitionColumnName(ctx sessionctx.Context, tbl *model.TableInfo) *a return nil } // PartitionExpr don't need columns and names for hash partition. - partitionExpr, err := table.(partitionTable).PartitionExpr() - if err != nil { - return nil - } + partitionExpr := table.(partitionTable).PartitionExpr() expr := partitionExpr.OrigExpr col, ok := expr.(*ast.ColumnNameExpr) if !ok { diff --git a/planner/core/rule_partition_processor.go b/planner/core/rule_partition_processor.go index 81a43935b056f..5982bcb32d6ba 100644 --- a/planner/core/rule_partition_processor.go +++ b/planner/core/rule_partition_processor.go @@ -110,7 +110,7 @@ func (s *partitionProcessor) rewriteDataSource(lp LogicalPlan, opt *logicalOptim // partitionTable is for those tables which implement partition. type partitionTable interface { - PartitionExpr() (*tables.PartitionExpr, error) + PartitionExpr() *tables.PartitionExpr } func generateHashPartitionExpr(ctx sessionctx.Context, pi *model.PartitionInfo, columns []*expression.Column, names types.NameSlice) (expression.Expression, error) { @@ -595,13 +595,11 @@ func (l *listPartitionPruner) findUsedListPartitions(conds []expression.Expressi func (s *partitionProcessor) findUsedListPartitions(ctx sessionctx.Context, tbl table.Table, partitionNames []model.CIStr, conds []expression.Expression) ([]int, error) { pi := tbl.Meta().Partition - partExpr, err := tbl.(partitionTable).PartitionExpr() - if err != nil { - return nil, err - } + partExpr := tbl.(partitionTable).PartitionExpr() listPruner := newListPartitionPruner(ctx, tbl, partitionNames, s, conds, partExpr.ForListPruning) var used map[int]struct{} + var err error if partExpr.ForListPruning.ColPrunes == nil { used, err = listPruner.findUsedListPartitions(conds) } else { @@ -826,10 +824,7 @@ func intersectionRange(start, end, newStart, newEnd int) (int, int) { func (s *partitionProcessor) pruneRangePartition(ctx sessionctx.Context, pi *model.PartitionInfo, tbl table.PartitionedTable, conds []expression.Expression, columns []*expression.Column, names types.NameSlice) (partitionRangeOR, error) { - partExpr, err := tbl.(partitionTable).PartitionExpr() - if err != nil { - return nil, err - } + partExpr := tbl.(partitionTable).PartitionExpr() // Partition by range columns. if len(pi.Columns) > 0 { diff --git a/session/bootstrap.go b/session/bootstrap.go index 9a9e96b4acd63..69a5345e40791 100644 --- a/session/bootstrap.go +++ b/session/bootstrap.go @@ -2563,7 +2563,7 @@ func oldPasswordUpgrade(pass string) (string, error) { // rebuildAllPartitionValueMapAndSorted rebuilds all value map and sorted info for list column partitions with InfoSchema. func rebuildAllPartitionValueMapAndSorted(s *session) { type partitionExpr interface { - PartitionExpr() (*tables.PartitionExpr, error) + PartitionExpr() *tables.PartitionExpr } p := parser.New() @@ -2575,12 +2575,9 @@ func rebuildAllPartitionValueMapAndSorted(s *session) { continue } - pe, err := t.(partitionExpr).PartitionExpr() - if err != nil { - panic("partition table gets partition expression failed") - } + pe := t.(partitionExpr).PartitionExpr() for _, cp := range pe.ColPrunes { - if err = cp.RebuildPartitionValueMapAndSorted(p); err != nil { + if err := cp.RebuildPartitionValueMapAndSorted(p, pi.Definitions); err != nil { logutil.BgLogger().Warn("build list column partition value map and sorted failed") break } diff --git a/session/session.go b/session/session.go index bd99f44604802..84c95bdb3ef9c 100644 --- a/session/session.go +++ b/session/session.go @@ -148,6 +148,7 @@ var ( telemetryTablePartitionDropIntervalUsage = metrics.TelemetryTablePartitionDropIntervalPartitionsCnt telemetryExchangePartitionUsage = metrics.TelemetryExchangePartitionCnt telemetryTableCompactPartitionUsage = metrics.TelemetryCompactPartitionCnt + telemetryReorganizePartitionUsage = metrics.TelemetryReorganizePartitionCnt telemetryLockUserUsage = metrics.TelemetryAccountLockCnt.WithLabelValues("lockUser") telemetryUnlockUserUsage = metrics.TelemetryAccountLockCnt.WithLabelValues("unlockUser") @@ -3999,7 +4000,7 @@ func (s *session) updateTelemetryMetric(es *executor.ExecStmt) { telemetryFlashbackClusterUsage.Inc() } - if ti.UesExchangePartition { + if ti.UseExchangePartition { telemetryExchangePartitionUsage.Inc() } @@ -4044,6 +4045,9 @@ func (s *session) updateTelemetryMetric(es *executor.ExecStmt) { if ti.PartitionTelemetry.UseCompactTablePartition { telemetryTableCompactPartitionUsage.Inc() } + if ti.PartitionTelemetry.UseReorganizePartition { + telemetryReorganizePartitionUsage.Inc() + } } if ti.AccountLockTelemetry != nil { diff --git a/statistics/handle/ddl.go b/statistics/handle/ddl.go index 1f93447b55c14..ed09b2d5660c3 100644 --- a/statistics/handle/ddl.go +++ b/statistics/handle/ddl.go @@ -61,6 +61,21 @@ func (h *Handle) HandleDDLEvent(t *util.Event) error { return err } } + case model.ActionReorganizePartition: + for _, def := range t.PartInfo.Definitions { + // TODO: Should we trigger analyze instead of adding 0s? + if err := h.insertTableStats2KV(t.TableInfo, def.ID); err != nil { + return err + } + } + // Update global stats, even though it should not have changed, + // the updated statistics from the newly reorganized partitions may be better + pruneMode := h.CurrentPruneMode() + if pruneMode == variable.Dynamic && t.PartInfo != nil { + if err := h.updateGlobalStats(t.TableInfo); err != nil { + return err + } + } case model.ActionFlashbackCluster: return h.updateStatsVersion() } diff --git a/store/gcworker/gc_worker.go b/store/gcworker/gc_worker.go index 474096fbca7ec..104e5ee7f2dc3 100644 --- a/store/gcworker/gc_worker.go +++ b/store/gcworker/gc_worker.go @@ -2138,6 +2138,10 @@ func (w *GCWorker) doGCPlacementRules(se session.Session, safePoint uint64, dr u if err = historyJob.DecodeArgs(&physicalTableIDs); err != nil { return } + case model.ActionReorganizePartition: + if err = historyJob.DecodeArgs(&physicalTableIDs); err != nil { + return + } } if len(physicalTableIDs) == 0 { diff --git a/table/table.go b/table/table.go index 8b316a048be55..d71c24fc64b18 100644 --- a/table/table.go +++ b/table/table.go @@ -196,6 +196,9 @@ type Table interface { // Type returns the type of table Type() Type + + // GetPartitionedTable returns nil if not partitioned + GetPartitionedTable() PartitionedTable } // AllocAutoIncrementValue allocates an auto_increment value for a new row. @@ -243,6 +246,7 @@ type PartitionedTable interface { GetPartition(physicalID int64) PhysicalTable GetPartitionByRow(sessionctx.Context, []types.Datum) (PhysicalTable, error) GetAllPartitionIDs() []int64 + GetPartitionColumnIDs() []int64 GetPartitionColumnNames() []model.CIStr CheckForExchangePartition(ctx sessionctx.Context, pi *model.PartitionInfo, r []types.Datum, pid int64) error } diff --git a/table/tables/partition.go b/table/tables/partition.go index 6a0b315b856e9..79cc3a3f361a4 100644 --- a/table/tables/partition.go +++ b/table/tables/partition.go @@ -40,6 +40,7 @@ import ( "github.com/pingcap/tidb/util" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/codec" + "github.com/pingcap/tidb/util/dbterror" "github.com/pingcap/tidb/util/hack" "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/mock" @@ -67,6 +68,7 @@ var _ table.PartitionedTable = &partitionedTable{} // partition also implements the table.Table interface. type partition struct { TableCommon + table *partitionedTable } // GetPhysicalID implements table.Table GetPhysicalID interface. @@ -74,6 +76,16 @@ func (p *partition) GetPhysicalID() int64 { return p.physicalTableID } +// GetPartitionedTable implements table.Table GetPartitionedTable interface. +func (p *partition) GetPartitionedTable() table.PartitionedTable { + return p.table +} + +// GetPartitionedTable implements table.Table GetPartitionedTable interface. +func (t *partitionedTable) GetPartitionedTable() table.PartitionedTable { + return t +} + // partitionedTable implements the table.PartitionedTable interface. // partitionedTable is a table, it contains many Partitions. type partitionedTable struct { @@ -82,11 +94,24 @@ type partitionedTable struct { partitions map[int64]*partition evalBufferTypes []*types.FieldType evalBufferPool sync.Pool + + // Only used during Reorganize partition + // reorganizePartitions is the currently used partitions that are reorganized + reorganizePartitions map[int64]interface{} + // doubleWriteParittions are the partitions not visible, but we should double write to + doubleWritePartitions map[int64]interface{} + reorgPartitionExpr *PartitionExpr } -func newPartitionedTable(tbl *TableCommon, tblInfo *model.TableInfo) (table.Table, error) { +// TODO: Check which data structures that can be shared between all partitions and which +// needs to be copies +func newPartitionedTable(tbl *TableCommon, tblInfo *model.TableInfo) (table.PartitionedTable, error) { + pi := tblInfo.GetPartitionInfo() + if pi == nil || len(pi.Definitions) == 0 { + return nil, table.ErrUnknownPartition + } ret := &partitionedTable{TableCommon: *tbl} - partitionExpr, err := newPartitionExpr(tblInfo) + partitionExpr, err := newPartitionExpr(tblInfo, pi.Definitions) if err != nil { return nil, errors.Trace(err) } @@ -100,7 +125,6 @@ func newPartitionedTable(tbl *TableCommon, tblInfo *model.TableInfo) (table.Tabl if err := initTableIndices(&ret.TableCommon); err != nil { return nil, errors.Trace(err) } - pi := tblInfo.GetPartitionInfo() partitions := make(map[int64]*partition, len(pi.Definitions)) for _, p := range pi.Definitions { var t partition @@ -108,13 +132,101 @@ func newPartitionedTable(tbl *TableCommon, tblInfo *model.TableInfo) (table.Tabl if err != nil { return nil, errors.Trace(err) } + t.table = ret partitions[p.ID] = &t } ret.partitions = partitions + // In StateWriteReorganization we are using the 'old' partition definitions + // and if any new change happens in DroppingDefinitions, it needs to be done + // also in AddingDefinitions (with new evaluation of the new expression) + // In StateDeleteReorganization we are using the 'new' partition definitions + // and if any new change happens in AddingDefinitions, it needs to be done + // also in DroppingDefinitions (since session running on schema version -1) + // should also see the changes + if pi.DDLState == model.StateDeleteReorganization { + origIdx := setIndexesState(ret, pi.DDLState) + defer unsetIndexesState(ret, origIdx) + ret.reorgPartitionExpr, err = newPartitionExpr(tblInfo, pi.DroppingDefinitions) + if err != nil { + return nil, errors.Trace(err) + } + ret.reorganizePartitions = make(map[int64]interface{}, len(pi.AddingDefinitions)) + for _, def := range pi.AddingDefinitions { + ret.reorganizePartitions[def.ID] = nil + } + ret.doubleWritePartitions = make(map[int64]interface{}, len(pi.DroppingDefinitions)) + for _, def := range pi.DroppingDefinitions { + p, err := initPartition(ret, def) + if err != nil { + return nil, err + } + partitions[def.ID] = p + ret.doubleWritePartitions[def.ID] = nil + } + } else { + if len(pi.AddingDefinitions) > 0 { + origIdx := setIndexesState(ret, pi.DDLState) + defer unsetIndexesState(ret, origIdx) + ret.reorgPartitionExpr, err = newPartitionExpr(tblInfo, pi.AddingDefinitions) + if err != nil { + return nil, errors.Trace(err) + } + ret.doubleWritePartitions = make(map[int64]interface{}, len(pi.AddingDefinitions)) + for _, def := range pi.AddingDefinitions { + ret.doubleWritePartitions[def.ID] = nil + p, err := initPartition(ret, def) + if err != nil { + return nil, err + } + partitions[def.ID] = p + } + } + if len(pi.DroppingDefinitions) > 0 { + ret.reorganizePartitions = make(map[int64]interface{}, len(pi.DroppingDefinitions)) + for _, def := range pi.DroppingDefinitions { + ret.reorganizePartitions[def.ID] = nil + } + } + } return ret, nil } -func newPartitionExpr(tblInfo *model.TableInfo) (*PartitionExpr, error) { +func setIndexesState(t *partitionedTable, state model.SchemaState) []*model.IndexInfo { + orig := t.meta.Indices + t.meta.Indices = make([]*model.IndexInfo, 0, len(orig)) + for i := range orig { + t.meta.Indices = append(t.meta.Indices, orig[i].Clone()) + if t.meta.Indices[i].State == model.StatePublic { + switch state { + case model.StateDeleteOnly, model.StateNone: + t.meta.Indices[i].State = model.StateDeleteOnly + case model.StatePublic: + // Keep as is + default: + // use the 'StateWriteReorganization' here, since StateDeleteReorganization + // would skip index writes. + t.meta.Indices[i].State = model.StateWriteReorganization + } + } + } + return orig +} + +func unsetIndexesState(t *partitionedTable, orig []*model.IndexInfo) { + t.meta.Indices = orig +} + +func initPartition(t *partitionedTable, def model.PartitionDefinition) (*partition, error) { + var newPart partition + err := initTableCommonWithIndices(&newPart.TableCommon, t.meta, def.ID, t.Columns, t.allocs) + if err != nil { + return nil, err + } + newPart.table = t + return &newPart, nil +} + +func newPartitionExpr(tblInfo *model.TableInfo, defs []model.PartitionDefinition) (*PartitionExpr, error) { // a partitioned table cannot rely on session context/sql modes, so use a default one! ctx := mock.NewContext() dbName := model.NewCIStr(ctx.GetSessionVars().CurrentDB) @@ -125,11 +237,11 @@ func newPartitionExpr(tblInfo *model.TableInfo) (*PartitionExpr, error) { pi := tblInfo.GetPartitionInfo() switch pi.Type { case model.PartitionTypeRange: - return generateRangePartitionExpr(ctx, pi, columns, names) + return generateRangePartitionExpr(ctx, pi, defs, columns, names) case model.PartitionTypeHash: return generateHashPartitionExpr(ctx, pi, columns, names) case model.PartitionTypeList: - return generateListPartitionExpr(ctx, tblInfo, columns, names) + return generateListPartitionExpr(ctx, tblInfo, defs, columns, names) } panic("cannot reach here") } @@ -148,8 +260,6 @@ type PartitionExpr struct { *ForRangeColumnsPruning // ColOffset is the offsets of partition columns. ColumnOffset []int - // InValues: x in (1,2); x in (3,4); x in (5,6), used for list partition. - InValues []expression.Expression *ForListPruning } @@ -182,19 +292,19 @@ type ForRangeColumnsPruning struct { LessThan [][]*expression.Expression } -func dataForRangeColumnsPruning(ctx sessionctx.Context, pi *model.PartitionInfo, schema *expression.Schema, names []*types.FieldName, p *parser.Parser) (*ForRangeColumnsPruning, error) { +func dataForRangeColumnsPruning(ctx sessionctx.Context, defs []model.PartitionDefinition, schema *expression.Schema, names []*types.FieldName, p *parser.Parser) (*ForRangeColumnsPruning, error) { var res ForRangeColumnsPruning - res.LessThan = make([][]*expression.Expression, 0, len(pi.Definitions)) - for i := 0; i < len(pi.Definitions); i++ { - lessThanCols := make([]*expression.Expression, 0, len(pi.Columns)) - for j := range pi.Definitions[i].LessThan { - if strings.EqualFold(pi.Definitions[i].LessThan[j], "MAXVALUE") { + res.LessThan = make([][]*expression.Expression, 0, len(defs)) + for i := 0; i < len(defs); i++ { + lessThanCols := make([]*expression.Expression, 0, len(defs[i].LessThan)) + for j := range defs[i].LessThan { + if strings.EqualFold(defs[i].LessThan[j], "MAXVALUE") { // Use a nil pointer instead of math.MaxInt64 to avoid the corner cases. lessThanCols = append(lessThanCols, nil) // No column after MAXVALUE matters break } - tmp, err := parseSimpleExprWithNames(p, ctx, pi.Definitions[i].LessThan[j], schema, names) + tmp, err := parseSimpleExprWithNames(p, ctx, defs[i].LessThan[j], schema, names) if err != nil { return nil, err } @@ -426,29 +536,29 @@ type ForRangePruning struct { Unsigned bool } -// dataForRangePruning extracts the less than parts from 'partition p0 less than xx ... partitoin p1 less than ...' -func dataForRangePruning(sctx sessionctx.Context, pi *model.PartitionInfo) (*ForRangePruning, error) { +// dataForRangePruning extracts the less than parts from 'partition p0 less than xx ... partition p1 less than ...' +func dataForRangePruning(sctx sessionctx.Context, defs []model.PartitionDefinition) (*ForRangePruning, error) { var maxValue bool var unsigned bool - lessThan := make([]int64, len(pi.Definitions)) - for i := 0; i < len(pi.Definitions); i++ { - if strings.EqualFold(pi.Definitions[i].LessThan[0], "MAXVALUE") { + lessThan := make([]int64, len(defs)) + for i := 0; i < len(defs); i++ { + if strings.EqualFold(defs[i].LessThan[0], "MAXVALUE") { // Use a bool flag instead of math.MaxInt64 to avoid the corner cases. maxValue = true } else { var err error - lessThan[i], err = strconv.ParseInt(pi.Definitions[i].LessThan[0], 10, 64) + lessThan[i], err = strconv.ParseInt(defs[i].LessThan[0], 10, 64) var numErr *strconv.NumError if stderr.As(err, &numErr) && numErr.Err == strconv.ErrRange { var tmp uint64 - tmp, err = strconv.ParseUint(pi.Definitions[i].LessThan[0], 10, 64) + tmp, err = strconv.ParseUint(defs[i].LessThan[0], 10, 64) lessThan[i] = int64(tmp) unsigned = true } if err != nil { - val, ok := fixOldVersionPartitionInfo(sctx, pi.Definitions[i].LessThan[0]) + val, ok := fixOldVersionPartitionInfo(sctx, defs[i].LessThan[0]) if !ok { - logutil.BgLogger().Error("wrong partition definition", zap.String("less than", pi.Definitions[i].LessThan[0])) + logutil.BgLogger().Error("wrong partition definition", zap.String("less than", defs[i].LessThan[0])) return nil, errors.WithStack(err) } lessThan[i] = val @@ -490,40 +600,14 @@ func rangePartitionExprStrings(pi *model.PartitionInfo) []string { } func generateRangePartitionExpr(ctx sessionctx.Context, pi *model.PartitionInfo, - columns []*expression.Column, names types.NameSlice) (*PartitionExpr, error) { + defs []model.PartitionDefinition, columns []*expression.Column, names types.NameSlice) (*PartitionExpr, error) { // The caller should assure partition info is not nil. - locateExprs := make([]expression.Expression, 0, len(pi.Definitions)) - var buf bytes.Buffer p := parser.New() schema := expression.NewSchema(columns...) partStrs := rangePartitionExprStrings(pi) - for i := 0; i < len(pi.Definitions); i++ { - if strings.EqualFold(pi.Definitions[i].LessThan[0], "MAXVALUE") { - // Expr less than maxvalue is always true. - fmt.Fprintf(&buf, "true") - } else { - maxValueFound := false - for j := range partStrs[1:] { - if strings.EqualFold(pi.Definitions[i].LessThan[j+1], "MAXVALUE") { - // if any column will be less than MAXVALUE, so change < to <= of the previous prefix of columns - fmt.Fprintf(&buf, "((%s) <= (%s))", strings.Join(partStrs[:j+1], ","), strings.Join(pi.Definitions[i].LessThan[:j+1], ",")) - maxValueFound = true - break - } - } - if !maxValueFound { - fmt.Fprintf(&buf, "((%s) < (%s))", strings.Join(partStrs, ","), strings.Join(pi.Definitions[i].LessThan, ",")) - } - } - - expr, err := parseSimpleExprWithNames(p, ctx, buf.String(), schema, names) - if err != nil { - // If it got an error here, ddl may hang forever, so this error log is important. - logutil.BgLogger().Error("wrong table partition expression", zap.String("expression", buf.String()), zap.Error(err)) - return nil, errors.Trace(err) - } - locateExprs = append(locateExprs, expr) - buf.Reset() + locateExprs, err := getRangeLocateExprs(ctx, p, defs, partStrs, schema, names) + if err != nil { + return nil, errors.Trace(err) } ret := &PartitionExpr{ UpperBounds: locateExprs, @@ -536,14 +620,14 @@ func generateRangePartitionExpr(ctx sessionctx.Context, pi *model.PartitionInfo, ret.ColumnOffset = offset if len(pi.Columns) < 1 { - tmp, err := dataForRangePruning(ctx, pi) + tmp, err := dataForRangePruning(ctx, defs) if err != nil { return nil, errors.Trace(err) } ret.Expr = partExpr ret.ForRangePruning = tmp } else { - tmp, err := dataForRangeColumnsPruning(ctx, pi, schema, names, p) + tmp, err := dataForRangeColumnsPruning(ctx, defs, schema, names, p) if err != nil { return nil, errors.Trace(err) } @@ -552,6 +636,40 @@ func generateRangePartitionExpr(ctx sessionctx.Context, pi *model.PartitionInfo, return ret, nil } +func getRangeLocateExprs(ctx sessionctx.Context, p *parser.Parser, defs []model.PartitionDefinition, partStrs []string, schema *expression.Schema, names types.NameSlice) ([]expression.Expression, error) { + var buf bytes.Buffer + locateExprs := make([]expression.Expression, 0, len(defs)) + for i := 0; i < len(defs); i++ { + if strings.EqualFold(defs[i].LessThan[0], "MAXVALUE") { + // Expr less than maxvalue is always true. + fmt.Fprintf(&buf, "true") + } else { + maxValueFound := false + for j := range partStrs[1:] { + if strings.EqualFold(defs[i].LessThan[j+1], "MAXVALUE") { + // if any column will be less than MAXVALUE, so change < to <= of the previous prefix of columns + fmt.Fprintf(&buf, "((%s) <= (%s))", strings.Join(partStrs[:j+1], ","), strings.Join(defs[i].LessThan[:j+1], ",")) + maxValueFound = true + break + } + } + if !maxValueFound { + fmt.Fprintf(&buf, "((%s) < (%s))", strings.Join(partStrs, ","), strings.Join(defs[i].LessThan, ",")) + } + } + + expr, err := parseSimpleExprWithNames(p, ctx, buf.String(), schema, names) + if err != nil { + // If it got an error here, ddl may hang forever, so this error log is important. + logutil.BgLogger().Error("wrong table partition expression", zap.String("expression", buf.String()), zap.Error(err)) + return nil, errors.Trace(err) + } + locateExprs = append(locateExprs, expr) + buf.Reset() + } + return locateExprs, nil +} + func getColumnsOffset(cols, columns []*expression.Column) []int { colsOffset := make([]int, len(cols)) for i, col := range columns { @@ -603,7 +721,7 @@ func extractPartitionExprColumns(ctx sessionctx.Context, pi *model.PartitionInfo } func generateListPartitionExpr(ctx sessionctx.Context, tblInfo *model.TableInfo, - columns []*expression.Column, names types.NameSlice) (*PartitionExpr, error) { + defs []model.PartitionDefinition, columns []*expression.Column, names types.NameSlice) (*PartitionExpr, error) { // The caller should assure partition info is not nil. pi := tblInfo.GetPartitionInfo() partExpr, exprCols, offset, err := extractPartitionExprColumns(ctx, pi, columns, names) @@ -612,9 +730,9 @@ func generateListPartitionExpr(ctx sessionctx.Context, tblInfo *model.TableInfo, } listPrune := &ForListPruning{} if len(pi.Columns) == 0 { - err = listPrune.buildListPruner(ctx, tblInfo, exprCols, columns, names) + err = listPrune.buildListPruner(ctx, tblInfo, defs, exprCols, columns, names) } else { - err = listPrune.buildListColumnsPruner(ctx, tblInfo, columns, names) + err = listPrune.buildListColumnsPruner(ctx, tblInfo, defs, columns, names) } if err != nil { return nil, err @@ -627,7 +745,7 @@ func generateListPartitionExpr(ctx sessionctx.Context, tblInfo *model.TableInfo, return ret, nil } -func (lp *ForListPruning) buildListPruner(ctx sessionctx.Context, tblInfo *model.TableInfo, exprCols []*expression.Column, +func (lp *ForListPruning) buildListPruner(ctx sessionctx.Context, tblInfo *model.TableInfo, defs []model.PartitionDefinition, exprCols []*expression.Column, columns []*expression.Column, names types.NameSlice) error { pi := tblInfo.GetPartitionInfo() schema := expression.NewSchema(columns...) @@ -638,7 +756,7 @@ func (lp *ForListPruning) buildListPruner(ctx sessionctx.Context, tblInfo *model logutil.BgLogger().Error("wrong table partition expression", zap.String("expression", pi.Expr), zap.Error(err)) return errors.Trace(err) } - // Since need to change the column index of the expresion, clone the expression first. + // Since need to change the column index of the expression, clone the expression first. lp.LocateExpr = expr.Clone() lp.PruneExprCols = exprCols lp.PruneExpr = expr.Clone() @@ -650,14 +768,15 @@ func (lp *ForListPruning) buildListPruner(ctx sessionctx.Context, tblInfo *model } c.Index = idx } - err = lp.buildListPartitionValueMap(ctx, tblInfo, schema, names, p) + err = lp.buildListPartitionValueMap(ctx, defs, schema, names, p) if err != nil { return err } return nil } -func (lp *ForListPruning) buildListColumnsPruner(ctx sessionctx.Context, tblInfo *model.TableInfo, +func (lp *ForListPruning) buildListColumnsPruner(ctx sessionctx.Context, + tblInfo *model.TableInfo, defs []model.PartitionDefinition, columns []*expression.Column, names types.NameSlice) error { pi := tblInfo.GetPartitionInfo() schema := expression.NewSchema(columns...) @@ -683,7 +802,7 @@ func (lp *ForListPruning) buildListColumnsPruner(ctx sessionctx.Context, tblInfo valueMap: make(map[string]ListPartitionLocation), sorted: btree.NewG[*btreeListColumnItem](btreeDegree, lessBtreeListColumnItem), } - err := colPrune.buildPartitionValueMapAndSorted(p) + err := colPrune.buildPartitionValueMapAndSorted(p, defs) if err != nil { return err } @@ -696,12 +815,11 @@ func (lp *ForListPruning) buildListColumnsPruner(ctx sessionctx.Context, tblInfo // buildListPartitionValueMap builds list partition value map. // The map is column value -> partition index. // colIdx is the column index in the list columns. -func (lp *ForListPruning) buildListPartitionValueMap(ctx sessionctx.Context, tblInfo *model.TableInfo, +func (lp *ForListPruning) buildListPartitionValueMap(ctx sessionctx.Context, defs []model.PartitionDefinition, schema *expression.Schema, names types.NameSlice, p *parser.Parser) error { - pi := tblInfo.GetPartitionInfo() lp.valueMap = map[int64]int{} lp.nullPartitionIdx = -1 - for partitionIdx, def := range pi.Definitions { + for partitionIdx, def := range defs { for _, vs := range def.InValues { expr, err := parseSimpleExprWithNames(p, ctx, vs[0], schema, names) if err != nil { @@ -770,26 +888,27 @@ func (lp *ForListPruning) locateListColumnsPartitionByRow(ctx sessionctx.Context // buildPartitionValueMapAndSorted builds list columns partition value map for the specified column. // It also builds list columns partition value btree for the specified column. // colIdx is the specified column index in the list columns. -func (lp *ForListColumnPruning) buildPartitionValueMapAndSorted(p *parser.Parser) error { +func (lp *ForListColumnPruning) buildPartitionValueMapAndSorted(p *parser.Parser, + defs []model.PartitionDefinition) error { l := len(lp.valueMap) if l != 0 { return nil } - return lp.buildListPartitionValueMapAndSorted(p) + return lp.buildListPartitionValueMapAndSorted(p, defs) } // RebuildPartitionValueMapAndSorted rebuilds list columns partition value map for the specified column. -func (lp *ForListColumnPruning) RebuildPartitionValueMapAndSorted(p *parser.Parser) error { +func (lp *ForListColumnPruning) RebuildPartitionValueMapAndSorted(p *parser.Parser, + defs []model.PartitionDefinition) error { lp.valueMap = make(map[string]ListPartitionLocation, len(lp.valueMap)) lp.sorted.Clear(false) - return lp.buildListPartitionValueMapAndSorted(p) + return lp.buildListPartitionValueMapAndSorted(p, defs) } -func (lp *ForListColumnPruning) buildListPartitionValueMapAndSorted(p *parser.Parser) error { - pi := lp.tblInfo.GetPartitionInfo() +func (lp *ForListColumnPruning) buildListPartitionValueMapAndSorted(p *parser.Parser, defs []model.PartitionDefinition) error { sc := lp.ctx.GetSessionVars().StmtCtx - for partitionIdx, def := range pi.Definitions { + for partitionIdx, def := range defs { for groupIdx, vs := range def.InValues { keyBytes, err := lp.genConstExprKey(lp.ctx, sc, vs[lp.colIdx], lp.schema, lp.names, p) if err != nil { @@ -935,15 +1054,24 @@ func generateHashPartitionExpr(ctx sessionctx.Context, pi *model.PartitionInfo, } // PartitionExpr returns the partition expression. -func (t *partitionedTable) PartitionExpr() (*PartitionExpr, error) { - return t.partitionExpr, nil +func (t *partitionedTable) PartitionExpr() *PartitionExpr { + return t.partitionExpr } -func (t *partitionedTable) GetPartitionColumnNames() []model.CIStr { +func (t *partitionedTable) GetPartitionColumnIDs() []int64 { // PARTITION BY {LIST|RANGE} COLUMNS uses columns directly without expressions pi := t.Meta().Partition if len(pi.Columns) > 0 { - return pi.Columns + colIDs := make([]int64, 0, len(pi.Columns)) + for _, name := range pi.Columns { + col := table.FindColLowerCase(t.Cols(), name.L) + if col == nil { + // For safety, should not happen + continue + } + colIDs = append(colIDs, col.ID) + } + return colIDs } partitionCols := expression.ExtractColumns(t.partitionExpr.Expr) @@ -951,7 +1079,16 @@ func (t *partitionedTable) GetPartitionColumnNames() []model.CIStr { for _, col := range partitionCols { colIDs = append(colIDs, col.ID) } - colNames := make([]model.CIStr, 0, len(partitionCols)) + return colIDs +} + +func (t *partitionedTable) GetPartitionColumnNames() []model.CIStr { + pi := t.Meta().Partition + if len(pi.Columns) > 0 { + return pi.Columns + } + colIDs := t.GetPartitionColumnIDs() + colNames := make([]model.CIStr, 0, len(colIDs)) for _, colID := range colIDs { for _, col := range t.Cols() { if col.ID == colID { @@ -969,7 +1106,7 @@ func PartitionRecordKey(pid int64, handle int64) kv.Key { } func (t *partitionedTable) CheckForExchangePartition(ctx sessionctx.Context, pi *model.PartitionInfo, r []types.Datum, pid int64) error { - defID, err := t.locatePartition(ctx, pi, r) + defID, err := t.locatePartition(ctx, r) if err != nil { return err } @@ -979,36 +1116,59 @@ func (t *partitionedTable) CheckForExchangePartition(ctx sessionctx.Context, pi return nil } -// locatePartition returns the partition ID of the input record. -func (t *partitionedTable) locatePartition(ctx sessionctx.Context, pi *model.PartitionInfo, r []types.Datum) (int64, error) { +// locatePartitionCommon returns the partition idx of the input record. +func (t *partitionedTable) locatePartitionCommon(ctx sessionctx.Context, pi *model.PartitionInfo, partitionExpr *PartitionExpr, r []types.Datum) (int, error) { var err error var idx int switch t.meta.Partition.Type { case model.PartitionTypeRange: if len(pi.Columns) == 0 { - idx, err = t.locateRangePartition(ctx, pi, r) + idx, err = t.locateRangePartition(ctx, partitionExpr, r) } else { - idx, err = t.locateRangeColumnPartition(ctx, pi, r) + idx, err = t.locateRangeColumnPartition(ctx, partitionExpr, r) } case model.PartitionTypeHash: + // Note that only LIST and RANGE supports REORGANIZE PARTITION + // TODO: Add support for ADD PARTITION and COALESCE PARTITION for HASH idx, err = t.locateHashPartition(ctx, pi, r) case model.PartitionTypeList: - idx, err = t.locateListPartition(ctx, pi, r) + idx, err = t.locateListPartition(ctx, partitionExpr, r) } if err != nil { return 0, errors.Trace(err) } + return idx, nil +} + +func (t *partitionedTable) locatePartition(ctx sessionctx.Context, r []types.Datum) (int64, error) { + pi := t.Meta().GetPartitionInfo() + idx, err := t.locatePartitionCommon(ctx, pi, t.partitionExpr, r) + if err != nil { + return 0, errors.Trace(err) + } return pi.Definitions[idx].ID, nil } -func (t *partitionedTable) locateRangeColumnPartition(ctx sessionctx.Context, pi *model.PartitionInfo, r []types.Datum) (int, error) { +func (t *partitionedTable) locateReorgPartition(ctx sessionctx.Context, r []types.Datum) (int64, error) { + pi := t.Meta().GetPartitionInfo() + idx, err := t.locatePartitionCommon(ctx, pi, t.reorgPartitionExpr, r) + if err != nil { + return 0, errors.Trace(err) + } + if pi.DDLState == model.StateDeleteReorganization { + return pi.DroppingDefinitions[idx].ID, nil + } + return pi.AddingDefinitions[idx].ID, nil +} + +func (t *partitionedTable) locateRangeColumnPartition(ctx sessionctx.Context, partitionExpr *PartitionExpr, r []types.Datum) (int, error) { + upperBounds := partitionExpr.UpperBounds var lastError error - partitionExprs := t.partitionExpr.UpperBounds evalBuffer := t.evalBufferPool.Get().(*chunk.MutRow) defer t.evalBufferPool.Put(evalBuffer) - idx := sort.Search(len(partitionExprs), func(i int) bool { + idx := sort.Search(len(upperBounds), func(i int) bool { evalBuffer.SetDatums(r...) - ret, isNull, err := partitionExprs[i].EvalInt(ctx, evalBuffer.ToRow()) + ret, isNull, err := upperBounds[i].EvalInt(ctx, evalBuffer.ToRow()) if err != nil { lastError = err return true // Does not matter, will propagate the last error anyway. @@ -1023,11 +1183,11 @@ func (t *partitionedTable) locateRangeColumnPartition(ctx sessionctx.Context, pi if lastError != nil { return 0, errors.Trace(lastError) } - if idx >= len(partitionExprs) { + if idx >= len(upperBounds) { // The data does not belong to any of the partition returns `table has no partition for value %s`. var valueMsg string - if pi.Expr != "" { - e, err := expression.ParseSimpleExprWithTableInfo(ctx, pi.Expr, t.meta) + if t.meta.Partition.Expr != "" { + e, err := expression.ParseSimpleExprWithTableInfo(ctx, t.meta.Partition.Expr, t.meta) if err == nil { val, _, err := e.EvalInt(ctx, chunk.MutRowFromDatums(r).ToRow()) if err == nil { @@ -1043,15 +1203,15 @@ func (t *partitionedTable) locateRangeColumnPartition(ctx sessionctx.Context, pi return idx, nil } -func (t *partitionedTable) locateListPartition(ctx sessionctx.Context, pi *model.PartitionInfo, r []types.Datum) (int, error) { - lp := t.partitionExpr.ForListPruning +func (t *partitionedTable) locateListPartition(ctx sessionctx.Context, partitionExpr *PartitionExpr, r []types.Datum) (int, error) { + lp := partitionExpr.ForListPruning if len(lp.ColPrunes) == 0 { return lp.locateListPartitionByRow(ctx, r) } return lp.locateListColumnsPartitionByRow(ctx, r) } -func (t *partitionedTable) locateRangePartition(ctx sessionctx.Context, pi *model.PartitionInfo, r []types.Datum) (int, error) { +func (t *partitionedTable) locateRangePartition(ctx sessionctx.Context, partitionExpr *PartitionExpr, r []types.Datum) (int, error) { var ( ret int64 val int64 @@ -1074,7 +1234,7 @@ func (t *partitionedTable) locateRangePartition(ctx sessionctx.Context, pi *mode ret = val } unsigned := mysql.HasUnsignedFlag(t.partitionExpr.Expr.GetType().GetFlag()) - ranges := t.partitionExpr.ForRangePruning + ranges := partitionExpr.ForRangePruning length := len(ranges.LessThan) pos := sort.Search(length, func(i int) bool { if isNull { @@ -1088,8 +1248,8 @@ func (t *partitionedTable) locateRangePartition(ctx sessionctx.Context, pi *mode if pos < 0 || pos >= length { // The data does not belong to any of the partition returns `table has no partition for value %s`. var valueMsg string - if pi.Expr != "" { - e, err := expression.ParseSimpleExprWithTableInfo(ctx, pi.Expr, t.meta) + if t.meta.Partition.Expr != "" { + e, err := expression.ParseSimpleExprWithTableInfo(ctx, t.meta.Partition.Expr, t.meta) if err == nil { val, _, err := e.EvalInt(ctx, chunk.MutRowFromDatums(r).ToRow()) if err == nil { @@ -1147,16 +1307,39 @@ func (t *partitionedTable) locateHashPartition(ctx sessionctx.Context, pi *model func (t *partitionedTable) GetPartition(pid int64) table.PhysicalTable { // Attention, can't simply use `return t.partitions[pid]` here. // Because A nil of type *partition is a kind of `table.PhysicalTable` - p, ok := t.partitions[pid] + part, ok := t.partitions[pid] if !ok { + // Should never happen! return nil } - return p + return part +} + +// GetReorganizedPartitionedTable returns the same table +// but only with the AddingDefinitions used. +func GetReorganizedPartitionedTable(t table.Table) (table.PartitionedTable, error) { + // This is used during Reorganize partitions; All data from DroppingDefinitions + // will be copied to AddingDefinitions, so only setup with AddingDefinitions! + + // Do not change any Definitions of t, but create a new struct. + if t.GetPartitionedTable() == nil { + return nil, dbterror.ErrUnsupportedReorganizePartition.GenWithStackByArgs() + } + tblInfo := t.Meta().Clone() + tblInfo.Partition.Definitions = tblInfo.Partition.AddingDefinitions + tblInfo.Partition.AddingDefinitions = nil + tblInfo.Partition.DroppingDefinitions = nil + var tc TableCommon + initTableCommon(&tc, tblInfo, tblInfo.ID, t.Cols(), t.Allocators(nil)) + + // and rebuild the partitioning structure + + return newPartitionedTable(&tc, tblInfo) } // GetPartitionByRow returns a Table, which is actually a Partition. func (t *partitionedTable) GetPartitionByRow(ctx sessionctx.Context, r []types.Datum) (table.PhysicalTable, error) { - pid, err := t.locatePartition(ctx, t.Meta().GetPartitionInfo(), r) + pid, err := t.locatePartition(ctx, r) if err != nil { return nil, errors.Trace(err) } @@ -1165,7 +1348,7 @@ func (t *partitionedTable) GetPartitionByRow(ctx sessionctx.Context, r []types.D // GetPartitionByRow returns a Table, which is actually a Partition. func (t *partitionTableWithGivenSets) GetPartitionByRow(ctx sessionctx.Context, r []types.Datum) (table.PhysicalTable, error) { - pid, err := t.locatePartition(ctx, t.Meta().GetPartitionInfo(), r) + pid, err := t.locatePartition(ctx, r) if err != nil { return nil, errors.Trace(err) } @@ -1181,8 +1364,7 @@ func (t *partitionedTable) AddRecord(ctx sessionctx.Context, r []types.Datum, op } func partitionedTableAddRecord(ctx sessionctx.Context, t *partitionedTable, r []types.Datum, partitionSelection map[int64]struct{}, opts []table.AddRecordOption) (recordID kv.Handle, err error) { - partitionInfo := t.meta.GetPartitionInfo() - pid, err := t.locatePartition(ctx, partitionInfo, r) + pid, err := t.locatePartition(ctx, r) if err != nil { return nil, errors.Trace(err) } @@ -1193,7 +1375,23 @@ func partitionedTableAddRecord(ctx sessionctx.Context, t *partitionedTable, r [] } } tbl := t.GetPartition(pid) - return tbl.AddRecord(ctx, r, opts...) + recordID, err = tbl.AddRecord(ctx, r, opts...) + if err != nil { + return + } + if _, ok := t.reorganizePartitions[pid]; ok { + // Double write to the ongoing reorganized partition + pid, err = t.locateReorgPartition(ctx, r) + if err != nil { + return nil, errors.Trace(err) + } + tbl = t.GetPartition(pid) + recordID, err = tbl.AddRecord(ctx, r, opts...) + if err != nil { + return + } + } + return } // partitionTableWithGivenSets is used for this kind of grammar: partition (p0,p1) @@ -1230,19 +1428,37 @@ func (t *partitionTableWithGivenSets) GetAllPartitionIDs() []int64 { // RemoveRecord implements table.Table RemoveRecord interface. func (t *partitionedTable) RemoveRecord(ctx sessionctx.Context, h kv.Handle, r []types.Datum) error { - partitionInfo := t.meta.GetPartitionInfo() - pid, err := t.locatePartition(ctx, partitionInfo, r) + pid, err := t.locatePartition(ctx, r) if err != nil { return errors.Trace(err) } tbl := t.GetPartition(pid) - return tbl.RemoveRecord(ctx, h, r) + err = tbl.RemoveRecord(ctx, h, r) + if err != nil { + return errors.Trace(err) + } + + if _, ok := t.reorganizePartitions[pid]; ok { + pid, err = t.locateReorgPartition(ctx, r) + if err != nil { + return errors.Trace(err) + } + tbl = t.GetPartition(pid) + err = tbl.RemoveRecord(ctx, h, r) + if err != nil { + return errors.Trace(err) + } + } + return nil } func (t *partitionedTable) GetAllPartitionIDs() []int64 { ptIDs := make([]int64, 0, len(t.partitions)) for id := range t.partitions { + if _, ok := t.doubleWritePartitions[id]; ok { + continue + } ptIDs = append(ptIDs, id) } return ptIDs @@ -1260,12 +1476,11 @@ func (t *partitionTableWithGivenSets) UpdateRecord(ctx context.Context, sctx ses } func partitionedTableUpdateRecord(gctx context.Context, ctx sessionctx.Context, t *partitionedTable, h kv.Handle, currData, newData []types.Datum, touched []bool, partitionSelection map[int64]struct{}) error { - partitionInfo := t.meta.GetPartitionInfo() - from, err := t.locatePartition(ctx, partitionInfo, currData) + from, err := t.locatePartition(ctx, currData) if err != nil { return errors.Trace(err) } - to, err := t.locatePartition(ctx, partitionInfo, newData) + to, err := t.locatePartition(ctx, newData) if err != nil { return errors.Trace(err) } @@ -1297,11 +1512,82 @@ func partitionedTableUpdateRecord(gctx context.Context, ctx sessionctx.Context, logutil.BgLogger().Error("update partition record fails", zap.String("message", "new record inserted while old record is not removed"), zap.Error(err)) return errors.Trace(err) } + newTo, newFrom := int64(0), int64(0) + if _, ok := t.reorganizePartitions[to]; ok { + newTo, err = t.locateReorgPartition(ctx, newData) + // There might be valid cases when errors should be accepted? + if err != nil { + return errors.Trace(err) + } + } + if _, ok := t.reorganizePartitions[from]; ok { + newFrom, err = t.locateReorgPartition(ctx, currData) + // There might be valid cases when errors should be accepted? + if err != nil { + return errors.Trace(err) + } + } + if newTo == newFrom && newTo != 0 { + // Update needs to be done in StateDeleteOnly as well + tbl := t.GetPartition(newTo) + return tbl.UpdateRecord(gctx, ctx, h, currData, newData, touched) + } + if newTo != 0 && t.Meta().GetPartitionInfo().DDLState != model.StateDeleteOnly { + tbl := t.GetPartition(newTo) + _, err = tbl.AddRecord(ctx, newData) + if err != nil { + return errors.Trace(err) + } + } + if newFrom != 0 { + tbl := t.GetPartition(newFrom) + err = tbl.RemoveRecord(ctx, h, currData) + // TODO: Can this happen? When the data is not yet backfilled? + if err != nil { + return errors.Trace(err) + } + } return nil } - tbl := t.GetPartition(to) - return tbl.UpdateRecord(gctx, ctx, h, currData, newData, touched) + err = tbl.UpdateRecord(gctx, ctx, h, currData, newData, touched) + if err != nil { + return errors.Trace(err) + } + if _, ok := t.reorganizePartitions[to]; ok { + // Even if to == from, in the reorganized partitions they may differ + // like in case of a split + newTo, err := t.locateReorgPartition(ctx, newData) + if err != nil { + return errors.Trace(err) + } + newFrom, err := t.locateReorgPartition(ctx, currData) + if err != nil { + return errors.Trace(err) + } + if newTo == newFrom { + // Update needs to be done in StateDeleteOnly as well + tbl = t.GetPartition(newTo) + err = tbl.UpdateRecord(gctx, ctx, h, currData, newData, touched) + if err != nil { + return errors.Trace(err) + } + return nil + } + if t.Meta().GetPartitionInfo().DDLState != model.StateDeleteOnly { + tbl = t.GetPartition(newTo) + _, err = tbl.AddRecord(ctx, newData) + if err != nil { + return errors.Trace(err) + } + } + tbl = t.GetPartition(newFrom) + err = tbl.RemoveRecord(ctx, h, currData) + if err != nil { + return errors.Trace(err) + } + } + return nil } // FindPartitionByName finds partition in table meta by name. diff --git a/table/tables/partition_test.go b/table/tables/partition_test.go index cc8dd90a44737..0bac493aa7f35 100644 --- a/table/tables/partition_test.go +++ b/table/tables/partition_test.go @@ -273,10 +273,9 @@ func TestGeneratePartitionExpr(t *testing.T) { tbl, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t1")) require.NoError(t, err) type partitionExpr interface { - PartitionExpr() (*tables.PartitionExpr, error) + PartitionExpr() *tables.PartitionExpr } - pe, err := tbl.(partitionExpr).PartitionExpr() - require.NoError(t, err) + pe := tbl.(partitionExpr).PartitionExpr() upperBounds := []string{ "lt(t1.id, 4)", diff --git a/table/tables/tables.go b/table/tables/tables.go index 3e4a673cc77fb..25b1301d7bb00 100644 --- a/table/tables/tables.go +++ b/table/tables/tables.go @@ -57,6 +57,7 @@ import ( // TableCommon is shared by both Table and partition. type TableCommon struct { + // TODO: Why do we need tableID, when it is already in meta.ID ? tableID int64 // physicalTableID is a unique int64 to identify a physical table. physicalTableID int64 @@ -235,6 +236,11 @@ func (t *TableCommon) GetPhysicalID() int64 { return t.physicalTableID } +// GetPartitionedTable implements table.Table GetPhysicalID interface. +func (t *TableCommon) GetPartitionedTable() table.PartitionedTable { + return nil +} + type getColsMode int64 const ( @@ -1305,9 +1311,32 @@ func (t *TableCommon) removeRowData(ctx sessionctx.Context, h kv.Handle) error { } } }) - err = txn.SetAssertion(key, kv.SetAssertExist) - if err != nil { - return err + doAssert := true + p := t.Meta().Partition + if p != nil { + // This disables asserting during Reorganize Partition. + switch ctx.GetSessionVars().AssertionLevel { + case variable.AssertionLevelFast: + // Fast option, just skip assertion for all partitions. + if p.DDLState != model.StateNone && p.DDLState != model.StatePublic { + doAssert = false + } + case variable.AssertionLevelStrict: + // Strict, only disable assertion for intermediate partitions. + // If there were an easy way to get from a TableCommon back to the partitioned table... + for i := range p.AddingDefinitions { + if t.physicalTableID == p.AddingDefinitions[i].ID { + doAssert = false + break + } + } + } + } + if doAssert { + err = txn.SetAssertion(key, kv.SetAssertExist) + if err != nil { + return err + } } return txn.Delete(key) } diff --git a/telemetry/data_feature_usage_test.go b/telemetry/data_feature_usage_test.go index af746b1b7b44d..278ef894e3474 100644 --- a/telemetry/data_feature_usage_test.go +++ b/telemetry/data_feature_usage_test.go @@ -253,6 +253,7 @@ func TestTablePartition(t *testing.T) { require.Equal(t, int64(0), usage.TablePartition.TablePartitionCreateIntervalPartitionsCnt) require.Equal(t, int64(0), usage.TablePartition.TablePartitionAddIntervalPartitionsCnt) require.Equal(t, int64(0), usage.TablePartition.TablePartitionDropIntervalPartitionsCnt) + require.Equal(t, int64(0), usage.TablePartition.TablePartitionReorganizePartitionCnt) telemetry.PostReportTelemetryDataForTest() tk.MustExec("drop table if exists pt1") @@ -264,6 +265,7 @@ func TestTablePartition(t *testing.T) { "partition p4 values less than (15))") tk.MustExec("alter table pt1 first partition less than (9)") tk.MustExec("alter table pt1 last partition less than (21)") + tk.MustExec("alter table pt1 reorganize partition p4 into (partition p4 values less than (13), partition p5 values less than (15))") tk.MustExec("drop table if exists pt1") tk.MustExec("create table pt1 (d datetime primary key, v varchar(255)) partition by range columns(d)" + " interval (1 day) first partition less than ('2022-01-01') last partition less than ('2022-02-22')") @@ -288,6 +290,7 @@ func TestTablePartition(t *testing.T) { require.Equal(t, int64(1), usage.TablePartition.TablePartitionCreateIntervalPartitionsCnt) require.Equal(t, int64(1), usage.TablePartition.TablePartitionAddIntervalPartitionsCnt) require.Equal(t, int64(1), usage.TablePartition.TablePartitionDropIntervalPartitionsCnt) + require.Equal(t, int64(1), usage.TablePartition.TablePartitionReorganizePartitionCnt) tk.MustExec("drop table if exists pt2") tk.MustExec("create table pt2 (a int,b int) partition by range(a) (" +