From 7dbf1a5033a8af125a082bdc93743a8ccb67bf32 Mon Sep 17 00:00:00 2001 From: YangKeao Date: Mon, 9 Jan 2023 05:12:22 -0500 Subject: [PATCH] ttl: add ttl goroutine exit log (#40416) close pingcap/tidb#40415 --- domain/domain.go | 4 ++++ ttl/ttlworker/del.go | 5 ++++- ttl/ttlworker/job_manager.go | 4 +++- ttl/ttlworker/scan.go | 5 ++++- ttl/ttlworker/worker.go | 5 +++++ 5 files changed, 20 insertions(+), 3 deletions(-) diff --git a/domain/domain.go b/domain/domain.go index d01b900cdf444..06fd9ff4a62a9 100644 --- a/domain/domain.go +++ b/domain/domain.go @@ -2479,6 +2479,10 @@ func (do *Domain) serverIDKeeper() { // StartTTLJobManager creates and starts the ttl job manager func (do *Domain) StartTTLJobManager() { do.wg.Run(func() { + defer func() { + logutil.BgLogger().Info("ttlJobManager exited.") + }() + ttlJobManager := ttlworker.NewJobManager(do.ddl.GetID(), do.sysSessionPool, do.store) do.ttlJobManager = ttlJobManager ttlJobManager.Start() diff --git a/ttl/ttlworker/del.go b/ttl/ttlworker/del.go index 8f66fb7fad246..5236bcc2275e6 100644 --- a/ttl/ttlworker/del.go +++ b/ttl/ttlworker/del.go @@ -255,7 +255,10 @@ func newDeleteWorker(delCh <-chan *ttlDeleteTask, sessPool sessionPool) *ttlDele func (w *ttlDeleteWorker) loop() error { tracer := metrics.NewDeleteWorkerPhaseTracer() - defer tracer.EndPhase() + defer func() { + tracer.EndPhase() + logutil.BgLogger().Info("ttlDeleteWorker loop exited.") + }() tracer.EnterPhase(metrics.PhaseOther) se, err := getSession(w.sessionPool) diff --git a/ttl/ttlworker/job_manager.go b/ttl/ttlworker/job_manager.go index bb52457e484e5..f0d88a6e5668d 100644 --- a/ttl/ttlworker/job_manager.go +++ b/ttl/ttlworker/job_manager.go @@ -117,6 +117,7 @@ func (m *JobManager) jobLoop() error { defer func() { err = multierr.Combine(err, multierr.Combine(m.resizeScanWorkers(0), m.resizeDelWorkers(0))) se.Close() + logutil.Logger(m.ctx).Info("ttlJobManager loop exited.") }() scheduleTicker := time.Tick(jobManagerLoopTickerInterval) @@ -247,7 +248,8 @@ func (m *JobManager) resizeWorkers(workers []worker, count int, factory func() w } var errs error - ctx, cancel := context.WithTimeout(m.ctx, 30*time.Second) + // don't use `m.ctx` here, because when shutdown the server, `m.ctx` has already been cancelled + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) for _, w := range workers[count:] { err := w.WaitStopped(ctx, 30*time.Second) if err != nil { diff --git a/ttl/ttlworker/scan.go b/ttl/ttlworker/scan.go index 38a4fd544535d..48686ef87d2f3 100644 --- a/ttl/ttlworker/scan.go +++ b/ttl/ttlworker/scan.go @@ -284,7 +284,10 @@ func (w *ttlScanWorker) PollTaskResult() *ttlScanTaskExecResult { func (w *ttlScanWorker) loop() error { ctx := w.baseWorker.ctx tracer := metrics.NewScanWorkerPhaseTracer() - defer tracer.EndPhase() + defer func() { + tracer.EndPhase() + logutil.BgLogger().Info("ttlScanWorker loop exited.") + }() ticker := time.Tick(time.Second * 5) for w.Status() == workerStatusRunning { diff --git a/ttl/ttlworker/worker.go b/ttl/ttlworker/worker.go index 783384862cacf..68ea0d9a1b952 100644 --- a/ttl/ttlworker/worker.go +++ b/ttl/ttlworker/worker.go @@ -20,6 +20,8 @@ import ( "time" "github.com/pingcap/tidb/util" + "github.com/pingcap/tidb/util/logutil" + "go.uber.org/zap" ) type workerStatus int @@ -122,6 +124,9 @@ func (w *baseWorker) Send() chan<- interface{} { func (w *baseWorker) loop() { var err error defer func() { + if r := recover(); r != nil { + logutil.BgLogger().Info("ttl worker panic", zap.Any("recover", r)) + } w.Lock() w.toStopped(err) w.Unlock()