From 5bdfa717f2c3c8a70cd5b502b79e10a659a62c63 Mon Sep 17 00:00:00 2001 From: Hu# Date: Mon, 13 Mar 2023 15:06:41 +0800 Subject: [PATCH] *: add defer to logs the panic reason and stack (#6123) ref tikv/pd#6099 add defer logs the panic reason and stack Signed-off-by: husharp Co-authored-by: Ti Chi Robot --- pkg/cache/ttl.go | 2 ++ pkg/election/lease.go | 3 +++ pkg/gctuner/memory_limit_tuner.go | 2 ++ pkg/mcs/discovery/register.go | 2 ++ pkg/mcs/resource_manager/server/manager.go | 7 ++++++- pkg/mcs/resource_manager/server/server.go | 4 ++++ pkg/mcs/tso/server/grpc_service.go | 3 +++ pkg/mcs/tso/server/server.go | 5 +++++ pkg/schedule/region_scatterer.go | 5 +---- pkg/systimemon/systimemon.go | 2 ++ pkg/tso/allocator_manager.go | 4 ++++ pkg/tso/global_allocator.go | 2 ++ server/region_syncer/client.go | 2 ++ 13 files changed, 38 insertions(+), 5 deletions(-) diff --git a/pkg/cache/ttl.go b/pkg/cache/ttl.go index e1a91a15f72..14adf072dad 100644 --- a/pkg/cache/ttl.go +++ b/pkg/cache/ttl.go @@ -19,6 +19,7 @@ import ( "time" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/syncutil" "go.uber.org/zap" ) @@ -142,6 +143,7 @@ func (c *ttlCache) Clear() { } func (c *ttlCache) doGC() { + defer logutil.LogPanic() ticker := time.NewTicker(c.gcInterval) defer ticker.Stop() diff --git a/pkg/election/lease.go b/pkg/election/lease.go index 6df418fd697..99936a39ad1 100644 --- a/pkg/election/lease.go +++ b/pkg/election/lease.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/utils/etcdutil" + "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/typeutil" "go.etcd.io/etcd/clientv3" "go.uber.org/zap" @@ -129,6 +130,7 @@ func (l *lease) keepAliveWorker(ctx context.Context, interval time.Duration) <-c ch := make(chan time.Time) go func() { + defer logutil.LogPanic() ticker := time.NewTicker(interval) defer ticker.Stop() @@ -137,6 +139,7 @@ func (l *lease) keepAliveWorker(ctx context.Context, interval time.Duration) <-c for { go func() { + defer logutil.LogPanic() start := time.Now() ctx1, cancel := context.WithTimeout(ctx, l.leaseTimeout) defer cancel() diff --git a/pkg/gctuner/memory_limit_tuner.go b/pkg/gctuner/memory_limit_tuner.go index 59200368c63..77a8f9db4c6 100644 --- a/pkg/gctuner/memory_limit_tuner.go +++ b/pkg/gctuner/memory_limit_tuner.go @@ -23,6 +23,7 @@ import ( "github.com/pingcap/log" util "github.com/tikv/pd/pkg/gogc" "github.com/tikv/pd/pkg/memory" + "github.com/tikv/pd/pkg/utils/logutil" atomicutil "go.uber.org/atomic" "go.uber.org/zap" ) @@ -72,6 +73,7 @@ func (t *memoryLimitTuner) tuning() { if float64(r.HeapInuse)*ratio > float64(setMemoryLimit(-1)) { if t.nextGCTriggeredByMemoryLimit.Load() && t.waitingReset.CompareAndSwap(false, true) { go func() { + defer logutil.LogPanic() memory.MemoryLimitGCLast.Store(time.Now()) memory.MemoryLimitGCTotal.Add(1) setMemoryLimit(t.calcMemoryLimit(fallbackPercentage)) diff --git a/pkg/mcs/discovery/register.go b/pkg/mcs/discovery/register.go index e7e0254a5c4..fd99f3fcca7 100644 --- a/pkg/mcs/discovery/register.go +++ b/pkg/mcs/discovery/register.go @@ -20,6 +20,7 @@ import ( "time" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/utils/logutil" "go.etcd.io/etcd/clientv3" "go.uber.org/zap" ) @@ -70,6 +71,7 @@ func (sr *ServiceRegister) Register() error { return fmt.Errorf("keepalive failed: %v", err) } go func() { + defer logutil.LogPanic() for { select { case <-sr.ctx.Done(): diff --git a/pkg/mcs/resource_manager/server/manager.go b/pkg/mcs/resource_manager/server/manager.go index 414ee392f82..25439ad74e9 100644 --- a/pkg/mcs/resource_manager/server/manager.go +++ b/pkg/mcs/resource_manager/server/manager.go @@ -29,6 +29,7 @@ import ( bs "github.com/tikv/pd/pkg/basicserver" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/storage/kv" + "github.com/tikv/pd/pkg/utils/logutil" "go.uber.org/zap" ) @@ -121,7 +122,10 @@ func (m *Manager) Init(ctx context.Context) { m.storage.LoadResourceGroupStates(tokenHandler) // Start the background metrics flusher. go m.backgroundMetricsFlush(ctx) - go m.persistLoop(ctx) + go func() { + defer logutil.LogPanic() + m.persistLoop(ctx) + }() log.Info("resource group manager finishes initialization") } @@ -249,6 +253,7 @@ func (m *Manager) persistResourceGroupRunningState() { // Receive the consumption and flush it to the metrics. func (m *Manager) backgroundMetricsFlush(ctx context.Context) { + defer logutil.LogPanic() ticker := time.NewTicker(metricsCleanupInterval) defer ticker.Stop() for { diff --git a/pkg/mcs/resource_manager/server/server.go b/pkg/mcs/resource_manager/server/server.go index a53c23c54ab..e526ab8ae2f 100644 --- a/pkg/mcs/resource_manager/server/server.go +++ b/pkg/mcs/resource_manager/server/server.go @@ -271,6 +271,7 @@ func (s *Server) initClient() error { } func (s *Server) startGRPCServer(l net.Listener) { + defer logutil.LogPanic() defer s.serverLoopWg.Done() gs := grpc.NewServer() @@ -282,6 +283,7 @@ func (s *Server) startGRPCServer(l net.Listener) { // it doesn't happen in a reasonable amount of time. done := make(chan struct{}) go func() { + defer logutil.LogPanic() log.Info("try to gracefully stop the server now") gs.GracefulStop() close(done) @@ -300,6 +302,7 @@ func (s *Server) startGRPCServer(l net.Listener) { } func (s *Server) startHTTPServer(l net.Listener) { + defer logutil.LogPanic() defer s.serverLoopWg.Done() handler, _ := SetUpRestHandler(s.service) @@ -326,6 +329,7 @@ func (s *Server) startHTTPServer(l net.Listener) { } func (s *Server) startGRPCAndHTTPServers(l net.Listener) { + defer logutil.LogPanic() defer s.serverLoopWg.Done() mux := cmux.New(l) diff --git a/pkg/mcs/tso/server/grpc_service.go b/pkg/mcs/tso/server/grpc_service.go index d8c399b2960..c650c4910ad 100644 --- a/pkg/mcs/tso/server/grpc_service.go +++ b/pkg/mcs/tso/server/grpc_service.go @@ -29,6 +29,7 @@ import ( "github.com/tikv/pd/pkg/mcs/registry" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/grpcutil" + "github.com/tikv/pd/pkg/utils/logutil" "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/codes" @@ -191,6 +192,7 @@ func (s *Service) dispatchTSORequest(ctx context.Context, request *tsoRequest, f } func (s *Service) handleDispatcher(ctx context.Context, forwardedHost string, tsoRequestCh <-chan *tsoRequest, tsDeadlineCh chan<- deadline, doneCh <-chan struct{}, errCh chan<- error) { + defer logutil.LogPanic() dispatcherCtx, ctxCancel := context.WithCancel(ctx) defer ctxCancel() defer s.tsoDispatcher.Delete(forwardedHost) @@ -330,6 +332,7 @@ type deadline struct { } func watchTSDeadline(ctx context.Context, tsDeadlineCh <-chan deadline) { + defer logutil.LogPanic() ctx, cancel := context.WithCancel(ctx) defer cancel() for { diff --git a/pkg/mcs/tso/server/server.go b/pkg/mcs/tso/server/server.go index bb2e29fae04..fbaba395007 100644 --- a/pkg/mcs/tso/server/server.go +++ b/pkg/mcs/tso/server/server.go @@ -440,6 +440,7 @@ func (s *Server) SetExternalTS(externalTS uint64) error { } func checkStream(streamCtx context.Context, cancel context.CancelFunc, done chan struct{}) { + defer logutil.LogPanic() select { case <-done: return @@ -479,6 +480,7 @@ func (s *Server) initClient() error { } func (s *Server) startGRPCServer(l net.Listener) { + defer logutil.LogPanic() defer s.serverLoopWg.Done() gs := grpc.NewServer() @@ -491,6 +493,7 @@ func (s *Server) startGRPCServer(l net.Listener) { // it doesn't happen in a reasonable amount of time. done := make(chan struct{}) go func() { + defer logutil.LogPanic() log.Info("try to gracefully stop the server now") gs.GracefulStop() close(done) @@ -510,6 +513,7 @@ func (s *Server) startGRPCServer(l net.Listener) { } func (s *Server) startHTTPServer(l net.Listener) { + defer logutil.LogPanic() defer s.serverLoopWg.Done() handler, _ := SetUpRestHandler(s.service) @@ -536,6 +540,7 @@ func (s *Server) startHTTPServer(l net.Listener) { } func (s *Server) startGRPCAndHTTPServers(l net.Listener) { + defer logutil.LogPanic() defer s.serverLoopWg.Done() mux := cmux.New(l) diff --git a/pkg/schedule/region_scatterer.go b/pkg/schedule/region_scatterer.go index b5c671a3752..877dc8b2af4 100644 --- a/pkg/schedule/region_scatterer.go +++ b/pkg/schedule/region_scatterer.go @@ -407,10 +407,7 @@ func allowLeader(fit *placement.RegionFit, peer *metapb.Peer) bool { return false } peerFit := fit.GetRuleFit(peer.GetId()) - if peerFit == nil || peerFit.Rule == nil { - return false - } - if peerFit.Rule.IsWitness { + if peerFit == nil || peerFit.Rule == nil || peerFit.Rule.IsWitness { return false } switch peerFit.Rule.Role { diff --git a/pkg/systimemon/systimemon.go b/pkg/systimemon/systimemon.go index a3124312fa2..75fc5e68d8b 100644 --- a/pkg/systimemon/systimemon.go +++ b/pkg/systimemon/systimemon.go @@ -20,11 +20,13 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/utils/logutil" "go.uber.org/zap" ) // StartMonitor calls systimeErrHandler if system time jump backward. func StartMonitor(ctx context.Context, now func() time.Time, systimeErrHandler func()) { + defer logutil.LogPanic() log.Info("start system time monitor") tick := time.NewTicker(100 * time.Millisecond) defer tick.Stop() diff --git a/pkg/tso/allocator_manager.go b/pkg/tso/allocator_manager.go index 45fb771b9d6..a1ec8b50195 100644 --- a/pkg/tso/allocator_manager.go +++ b/pkg/tso/allocator_manager.go @@ -35,6 +35,7 @@ import ( "github.com/tikv/pd/pkg/storage/kv" "github.com/tikv/pd/pkg/utils/etcdutil" "github.com/tikv/pd/pkg/utils/grpcutil" + "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/syncutil" "go.etcd.io/etcd/clientv3" "go.uber.org/zap" @@ -411,6 +412,7 @@ func (am *AllocatorManager) getLocalTSOAllocatorPath() string { // similar logic with leaderLoop in server/server.go func (am *AllocatorManager) allocatorLeaderLoop(ctx context.Context, allocator *LocalTSOAllocator) { + defer logutil.LogPanic() defer log.Info("server is closed, return local tso allocator leader loop", zap.String("dc-location", allocator.GetDCLocation()), zap.String("local-tso-allocator-name", am.member.Member().Name)) @@ -662,6 +664,7 @@ func (am *AllocatorManager) allocatorUpdater() { // updateAllocator is used to update the allocator in the group. func (am *AllocatorManager) updateAllocator(ag *allocatorGroup) { + defer logutil.LogPanic() defer am.wg.Done() select { case <-ag.ctx.Done(): @@ -712,6 +715,7 @@ func (am *AllocatorManager) allocatorPatroller(serverCtx context.Context) { // ClusterDCLocationChecker collects all dc-locations of a cluster, computes some related info // and stores them into the DCLocationInfo, then finally writes them into am.mu.clusterDCLocations. func (am *AllocatorManager) ClusterDCLocationChecker() { + defer logutil.LogPanic() // Wait for the PD leader to be elected out. if am.member.GetLeader() == nil { return diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index b86ea01e394..e19ae93aa58 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -28,6 +28,7 @@ import ( "github.com/tikv/pd/pkg/election" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/slice" + "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/tsoutil" "github.com/tikv/pd/pkg/utils/typeutil" "go.uber.org/zap" @@ -340,6 +341,7 @@ func (gta *GlobalTSOAllocator) SyncMaxTS( // Send SyncMaxTSRequest to all allocator leaders concurrently. wg.Add(1) go func(ctx context.Context, conn *grpc.ClientConn, respCh chan<- *syncResp) { + defer logutil.LogPanic() defer wg.Done() syncMaxTSResp := &syncResp{} syncCtx, cancel := context.WithTimeout(ctx, rpcTimeout) diff --git a/server/region_syncer/client.go b/server/region_syncer/client.go index 2c1dc71e772..42e552cb050 100644 --- a/server/region_syncer/client.go +++ b/server/region_syncer/client.go @@ -26,6 +26,7 @@ import ( "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/utils/grpcutil" + "github.com/tikv/pd/pkg/utils/logutil" "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/backoff" @@ -117,6 +118,7 @@ func (s *RegionSyncer) StartSyncWithLeader(addr string) { ctx := s.mu.clientCtx go func() { + defer logutil.LogPanic() defer s.wg.Done() // used to load region from kv storage to cache storage. bc := s.server.GetBasicCluster()