From 65fff795b78882313621fd4ea66d8002aabddcfc Mon Sep 17 00:00:00 2001 From: lance6716 Date: Thu, 9 Feb 2023 18:01:42 +0800 Subject: [PATCH 1/4] lightning: fix panic when user cancel Signed-off-by: lance6716 --- br/pkg/lightning/backend/local/engine.go | 10 ++++++++++ br/pkg/lightning/lightning.go | 5 +++++ br/pkg/lightning/restore/table_restore.go | 13 ++++++++++++- br/tests/lightning_checkpoint_chunks/run.sh | 5 +++++ 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/br/pkg/lightning/backend/local/engine.go b/br/pkg/lightning/backend/local/engine.go index 04ac7dce7a7fe..745454a372b18 100644 --- a/br/pkg/lightning/backend/local/engine.go +++ b/br/pkg/lightning/backend/local/engine.go @@ -32,6 +32,7 @@ import ( "github.com/google/btree" "github.com/google/uuid" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/tidb/br/pkg/lightning/backend" "github.com/pingcap/tidb/br/pkg/lightning/backend/kv" "github.com/pingcap/tidb/br/pkg/lightning/checkpoints" @@ -1224,6 +1225,15 @@ func (w *Writer) flushKVs(ctx context.Context) error { if err != nil { return errors.Trace(err) } + + failpoint.Inject("orphanWriterGoRoutine", func() { + _ = common.KillMySelf() + // mimic we meet context cancel error when `addSST` + <-ctx.Done() + time.Sleep(5 * time.Second) + failpoint.Return(errors.Trace(ctx.Err())) + }) + err = w.addSST(ctx, meta) if err != nil { return errors.Trace(err) diff --git a/br/pkg/lightning/lightning.go b/br/pkg/lightning/lightning.go index ae83d41efd7f6..6ed4b40335b96 100644 --- a/br/pkg/lightning/lightning.go +++ b/br/pkg/lightning/lightning.go @@ -598,6 +598,11 @@ func (l *Lightning) run(taskCtx context.Context, taskCfg *config.Config, o *opti o.logger.Error("restore failed", log.ShortError(err)) return errors.Trace(err) } + + failpoint.Inject("orphanWriterGoRoutine", func() { + // don't exit too quickly to expose panic + defer time.Sleep(time.Second * 10) + }) defer procedure.Close() err = procedure.Run(ctx) diff --git a/br/pkg/lightning/restore/table_restore.go b/br/pkg/lightning/restore/table_restore.go index 6b1372c0bca9e..d005646f92fec 100644 --- a/br/pkg/lightning/restore/table_restore.go +++ b/br/pkg/lightning/restore/table_restore.go @@ -524,9 +524,15 @@ func (tr *TableRestore) restoreEngine( } checkFlushLock.Unlock() + failpoint.Inject("orphanWriterGoRoutine", func() { + if chunkIndex > 0 { + <-pCtx.Done() + } + }) + select { case <-pCtx.Done(): - return nil, pCtx.Err() + break default: } @@ -615,6 +621,11 @@ func (tr *TableRestore) restoreEngine( } wg.Wait() + select { + case <-pCtx.Done(): + return nil, pCtx.Err() + default: + } // Report some statistics into the log for debugging. totalKVSize := uint64(0) diff --git a/br/tests/lightning_checkpoint_chunks/run.sh b/br/tests/lightning_checkpoint_chunks/run.sh index 35cabe0aadfc5..48d24ca405070 100755 --- a/br/tests/lightning_checkpoint_chunks/run.sh +++ b/br/tests/lightning_checkpoint_chunks/run.sh @@ -48,6 +48,11 @@ for i in $(seq "$CHUNK_COUNT"); do done done +PKG="github.com/pingcap/tidb/br/pkg/lightning" +export GO_FAILPOINTS="$PKG/backend/local/orphanWriterGoRoutine=return();$PKG/restore/orphanWriterGoRoutine=return();$PKG/orphanWriterGoRoutine=return()" +# test won't panic +do_run_lightning config + # Set the failpoint to kill the lightning instance as soon as # one file (after writing totally $ROW_COUNT rows) is imported. # If checkpoint does work, this should kill exactly $CHUNK_COUNT instances of lightnings. From 6459325d5d8d67f2275f1aa22ba96e5c16f5785a Mon Sep 17 00:00:00 2001 From: lance6716 Date: Thu, 9 Feb 2023 19:02:06 +0800 Subject: [PATCH 2/4] address comment Signed-off-by: lance6716 --- br/pkg/lightning/restore/table_restore.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/br/pkg/lightning/restore/table_restore.go b/br/pkg/lightning/restore/table_restore.go index d005646f92fec..a3562cb436a5c 100644 --- a/br/pkg/lightning/restore/table_restore.go +++ b/br/pkg/lightning/restore/table_restore.go @@ -501,6 +501,7 @@ func (tr *TableRestore) restoreEngine( metrics, _ := metric.FromContext(ctx) // Restore table data +ChunkLoop: for chunkIndex, chunk := range cp.Chunks { if rc.status != nil && rc.status.backend == config.BackendTiDB { rc.status.FinishedFileSize.Add(chunk.Chunk.Offset - chunk.Key.Offset) @@ -532,7 +533,7 @@ func (tr *TableRestore) restoreEngine( select { case <-pCtx.Done(): - break + break ChunkLoop default: } From 3884cbbdc73436d07b5703d886deda0885f02be2 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Fri, 10 Feb 2023 10:15:21 +0800 Subject: [PATCH 3/4] disable a BR test Signed-off-by: lance6716 --- br/tests/br_views_and_sequences/run.sh | 110 ++++++++++++------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/br/tests/br_views_and_sequences/run.sh b/br/tests/br_views_and_sequences/run.sh index 71403d0a0f6a3..db063c4704916 100755 --- a/br/tests/br_views_and_sequences/run.sh +++ b/br/tests/br_views_and_sequences/run.sh @@ -13,58 +13,58 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -set -eu -DB="$TEST_NAME" - -trim_sql_result() { - tail -n1 | sed 's/[^0-9]//g' -} - -run_sql "create schema $DB;" -run_sql "create view $DB.view_1 as select 331 as m;" -run_sql "create view $DB.view_2 as select * from $DB.view_1;" -run_sql "create sequence $DB.seq_1 nocache cycle maxvalue 40;" -run_sql "create table $DB.table_1 (m int primary key default next value for $DB.seq_1, b int);" -run_sql "insert into $DB.table_1 (b) values (8), (12), (16), (20);" -run_sql "create sequence $DB.seq_2;" -run_sql "create table $DB.table_2 (a int default next value for $DB.seq_1, b int default next value for $DB.seq_2, c int);" -run_sql "insert into $DB.table_2 (c) values (24), (28), (32);" -run_sql "create view $DB.view_3 as select m from $DB.table_1 union select a * b as m from $DB.table_2 union select m from $DB.view_2;" -run_sql "drop view $DB.view_1;" -run_sql "create view $DB.view_1 as select 133 as m;" - -run_sql "create table $DB.auto_inc (n int primary key AUTO_INCREMENT);" -run_sql "insert into $DB.auto_inc values (), (), (), (), ();" -last_id=$(run_sql "select n from $DB.auto_inc order by n desc limit 1" | trim_sql_result) - -run_sql "create table $DB.auto_rnd (n BIGINT primary key AUTO_RANDOM(8));" -last_rnd_id=$(run_sql "insert into $DB.auto_rnd values (), (), (), (), ();select last_insert_id() & 0x7fffffffffffff;" | trim_sql_result ) - -echo "backup start..." -run_br backup db --db "$DB" -s "local://$TEST_DIR/$DB" --pd $PD_ADDR - -run_sql "drop schema $DB;" - -echo "restore start..." -run_br restore db --db $DB -s "local://$TEST_DIR/$DB" --pd $PD_ADDR - -set -x - -views_count=$(run_sql "select count(*) c, sum(m) s from $DB.view_3;" | tail -2 | paste -sd ';' -) -[ "$views_count" = 'c: 8;s: 181' ] - -run_sql "insert into $DB.table_2 (c) values (33);" -seq_val=$(run_sql "select a >= 8 and b >= 4 as g from $DB.table_2 where c = 33;" | tail -1) -[ "$seq_val" = 'g: 1' ] - -run_sql "insert into $DB.auto_inc values ();" -last_id_after_restore=$(run_sql "select n from $DB.auto_inc order by n desc limit 1;" | trim_sql_result) -[ $last_id_after_restore -gt $last_id ] -rnd_last_id_after_restore=$(run_sql "insert into $DB.auto_rnd values ();select last_insert_id() & 0x7fffffffffffff;" | trim_sql_result ) -[ $rnd_last_id_after_restore -gt $last_rnd_id ] -rnd_count_after_restore=$(run_sql "select count(*) from $DB.auto_rnd;" | trim_sql_result ) -[ $rnd_count_after_restore -gt 5 ] - - -run_sql "drop schema $DB" +# +#set -eu +#DB="$TEST_NAME" +# +#trim_sql_result() { +# tail -n1 | sed 's/[^0-9]//g' +#} +# +#run_sql "create schema $DB;" +#run_sql "create view $DB.view_1 as select 331 as m;" +#run_sql "create view $DB.view_2 as select * from $DB.view_1;" +#run_sql "create sequence $DB.seq_1 nocache cycle maxvalue 40;" +#run_sql "create table $DB.table_1 (m int primary key default next value for $DB.seq_1, b int);" +#run_sql "insert into $DB.table_1 (b) values (8), (12), (16), (20);" +#run_sql "create sequence $DB.seq_2;" +#run_sql "create table $DB.table_2 (a int default next value for $DB.seq_1, b int default next value for $DB.seq_2, c int);" +#run_sql "insert into $DB.table_2 (c) values (24), (28), (32);" +#run_sql "create view $DB.view_3 as select m from $DB.table_1 union select a * b as m from $DB.table_2 union select m from $DB.view_2;" +#run_sql "drop view $DB.view_1;" +#run_sql "create view $DB.view_1 as select 133 as m;" +# +#run_sql "create table $DB.auto_inc (n int primary key AUTO_INCREMENT);" +#run_sql "insert into $DB.auto_inc values (), (), (), (), ();" +#last_id=$(run_sql "select n from $DB.auto_inc order by n desc limit 1" | trim_sql_result) +# +#run_sql "create table $DB.auto_rnd (n BIGINT primary key AUTO_RANDOM(8));" +#last_rnd_id=$(run_sql "insert into $DB.auto_rnd values (), (), (), (), ();select last_insert_id() & 0x7fffffffffffff;" | trim_sql_result ) +# +#echo "backup start..." +#run_br backup db --db "$DB" -s "local://$TEST_DIR/$DB" --pd $PD_ADDR +# +#run_sql "drop schema $DB;" +# +#echo "restore start..." +#run_br restore db --db $DB -s "local://$TEST_DIR/$DB" --pd $PD_ADDR +# +#set -x +# +#views_count=$(run_sql "select count(*) c, sum(m) s from $DB.view_3;" | tail -2 | paste -sd ';' -) +#[ "$views_count" = 'c: 8;s: 181' ] +# +#run_sql "insert into $DB.table_2 (c) values (33);" +#seq_val=$(run_sql "select a >= 8 and b >= 4 as g from $DB.table_2 where c = 33;" | tail -1) +#[ "$seq_val" = 'g: 1' ] +# +#run_sql "insert into $DB.auto_inc values ();" +#last_id_after_restore=$(run_sql "select n from $DB.auto_inc order by n desc limit 1;" | trim_sql_result) +#[ $last_id_after_restore -gt $last_id ] +#rnd_last_id_after_restore=$(run_sql "insert into $DB.auto_rnd values ();select last_insert_id() & 0x7fffffffffffff;" | trim_sql_result ) +#[ $rnd_last_id_after_restore -gt $last_rnd_id ] +#rnd_count_after_restore=$(run_sql "select count(*) from $DB.auto_rnd;" | trim_sql_result ) +#[ $rnd_count_after_restore -gt 5 ] +# +# +#run_sql "drop schema $DB" From 42e6d0115b1369d6de34d992d79767025f4bc22b Mon Sep 17 00:00:00 2001 From: lance6716 Date: Fri, 10 Feb 2023 10:39:47 +0800 Subject: [PATCH 4/4] revert the comment Signed-off-by: lance6716 --- br/tests/br_views_and_sequences/run.sh | 110 ++++++++++++------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/br/tests/br_views_and_sequences/run.sh b/br/tests/br_views_and_sequences/run.sh index db063c4704916..71403d0a0f6a3 100755 --- a/br/tests/br_views_and_sequences/run.sh +++ b/br/tests/br_views_and_sequences/run.sh @@ -13,58 +13,58 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# -#set -eu -#DB="$TEST_NAME" -# -#trim_sql_result() { -# tail -n1 | sed 's/[^0-9]//g' -#} -# -#run_sql "create schema $DB;" -#run_sql "create view $DB.view_1 as select 331 as m;" -#run_sql "create view $DB.view_2 as select * from $DB.view_1;" -#run_sql "create sequence $DB.seq_1 nocache cycle maxvalue 40;" -#run_sql "create table $DB.table_1 (m int primary key default next value for $DB.seq_1, b int);" -#run_sql "insert into $DB.table_1 (b) values (8), (12), (16), (20);" -#run_sql "create sequence $DB.seq_2;" -#run_sql "create table $DB.table_2 (a int default next value for $DB.seq_1, b int default next value for $DB.seq_2, c int);" -#run_sql "insert into $DB.table_2 (c) values (24), (28), (32);" -#run_sql "create view $DB.view_3 as select m from $DB.table_1 union select a * b as m from $DB.table_2 union select m from $DB.view_2;" -#run_sql "drop view $DB.view_1;" -#run_sql "create view $DB.view_1 as select 133 as m;" -# -#run_sql "create table $DB.auto_inc (n int primary key AUTO_INCREMENT);" -#run_sql "insert into $DB.auto_inc values (), (), (), (), ();" -#last_id=$(run_sql "select n from $DB.auto_inc order by n desc limit 1" | trim_sql_result) -# -#run_sql "create table $DB.auto_rnd (n BIGINT primary key AUTO_RANDOM(8));" -#last_rnd_id=$(run_sql "insert into $DB.auto_rnd values (), (), (), (), ();select last_insert_id() & 0x7fffffffffffff;" | trim_sql_result ) -# -#echo "backup start..." -#run_br backup db --db "$DB" -s "local://$TEST_DIR/$DB" --pd $PD_ADDR -# -#run_sql "drop schema $DB;" -# -#echo "restore start..." -#run_br restore db --db $DB -s "local://$TEST_DIR/$DB" --pd $PD_ADDR -# -#set -x -# -#views_count=$(run_sql "select count(*) c, sum(m) s from $DB.view_3;" | tail -2 | paste -sd ';' -) -#[ "$views_count" = 'c: 8;s: 181' ] -# -#run_sql "insert into $DB.table_2 (c) values (33);" -#seq_val=$(run_sql "select a >= 8 and b >= 4 as g from $DB.table_2 where c = 33;" | tail -1) -#[ "$seq_val" = 'g: 1' ] -# -#run_sql "insert into $DB.auto_inc values ();" -#last_id_after_restore=$(run_sql "select n from $DB.auto_inc order by n desc limit 1;" | trim_sql_result) -#[ $last_id_after_restore -gt $last_id ] -#rnd_last_id_after_restore=$(run_sql "insert into $DB.auto_rnd values ();select last_insert_id() & 0x7fffffffffffff;" | trim_sql_result ) -#[ $rnd_last_id_after_restore -gt $last_rnd_id ] -#rnd_count_after_restore=$(run_sql "select count(*) from $DB.auto_rnd;" | trim_sql_result ) -#[ $rnd_count_after_restore -gt 5 ] -# -# -#run_sql "drop schema $DB" + +set -eu +DB="$TEST_NAME" + +trim_sql_result() { + tail -n1 | sed 's/[^0-9]//g' +} + +run_sql "create schema $DB;" +run_sql "create view $DB.view_1 as select 331 as m;" +run_sql "create view $DB.view_2 as select * from $DB.view_1;" +run_sql "create sequence $DB.seq_1 nocache cycle maxvalue 40;" +run_sql "create table $DB.table_1 (m int primary key default next value for $DB.seq_1, b int);" +run_sql "insert into $DB.table_1 (b) values (8), (12), (16), (20);" +run_sql "create sequence $DB.seq_2;" +run_sql "create table $DB.table_2 (a int default next value for $DB.seq_1, b int default next value for $DB.seq_2, c int);" +run_sql "insert into $DB.table_2 (c) values (24), (28), (32);" +run_sql "create view $DB.view_3 as select m from $DB.table_1 union select a * b as m from $DB.table_2 union select m from $DB.view_2;" +run_sql "drop view $DB.view_1;" +run_sql "create view $DB.view_1 as select 133 as m;" + +run_sql "create table $DB.auto_inc (n int primary key AUTO_INCREMENT);" +run_sql "insert into $DB.auto_inc values (), (), (), (), ();" +last_id=$(run_sql "select n from $DB.auto_inc order by n desc limit 1" | trim_sql_result) + +run_sql "create table $DB.auto_rnd (n BIGINT primary key AUTO_RANDOM(8));" +last_rnd_id=$(run_sql "insert into $DB.auto_rnd values (), (), (), (), ();select last_insert_id() & 0x7fffffffffffff;" | trim_sql_result ) + +echo "backup start..." +run_br backup db --db "$DB" -s "local://$TEST_DIR/$DB" --pd $PD_ADDR + +run_sql "drop schema $DB;" + +echo "restore start..." +run_br restore db --db $DB -s "local://$TEST_DIR/$DB" --pd $PD_ADDR + +set -x + +views_count=$(run_sql "select count(*) c, sum(m) s from $DB.view_3;" | tail -2 | paste -sd ';' -) +[ "$views_count" = 'c: 8;s: 181' ] + +run_sql "insert into $DB.table_2 (c) values (33);" +seq_val=$(run_sql "select a >= 8 and b >= 4 as g from $DB.table_2 where c = 33;" | tail -1) +[ "$seq_val" = 'g: 1' ] + +run_sql "insert into $DB.auto_inc values ();" +last_id_after_restore=$(run_sql "select n from $DB.auto_inc order by n desc limit 1;" | trim_sql_result) +[ $last_id_after_restore -gt $last_id ] +rnd_last_id_after_restore=$(run_sql "insert into $DB.auto_rnd values ();select last_insert_id() & 0x7fffffffffffff;" | trim_sql_result ) +[ $rnd_last_id_after_restore -gt $last_rnd_id ] +rnd_count_after_restore=$(run_sql "select count(*) from $DB.auto_rnd;" | trim_sql_result ) +[ $rnd_count_after_restore -gt 5 ] + + +run_sql "drop schema $DB"