From 0be20889906a26aeecd78963ef8e8e7bfb8ea601 Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Thu, 8 Apr 2021 17:24:51 -0400 Subject: [PATCH] DAOS-7198 control: Use join request context instead of timeout In the MS joinLoop, a short timeout was used to avoid blocking the loop if a join request handler exited before receiving the batched join response. Rather than relying on an arbitrary timeout value, we should instead pass in the join request's context so that we can correctly wait for the handler to receive the request or for the context to be canceled. Either way, it won't block the join loop indefinitely. Signed-off-by: Michael MacDonald --- src/control/server/mgmt_system.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index c6b566a7db5..369f13a77c7 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -139,13 +139,13 @@ func getPeerListenAddr(ctx context.Context, listenAddrStr string) (*net.TCPAddr, const ( groupUpdateInterval = 500 * time.Millisecond batchJoinInterval = 250 * time.Millisecond - joinRespTimeout = 10 * time.Millisecond ) type ( batchJoinRequest struct { mgmtpb.JoinReq peerAddr *net.TCPAddr + joinCtx context.Context respCh chan *batchJoinResponse } @@ -217,12 +217,11 @@ func (svc *mgmtSvc) joinLoop(parent context.Context) { svc.log.Debugf("sending %d join responses", len(joinReqs)) for i, req := range joinReqs { - ctx, cancel := context.WithTimeout(parent, joinRespTimeout) - defer cancel() - select { - case <-ctx.Done(): - svc.log.Errorf("failed to send join response: %s", ctx.Err()) + case <-parent.Done(): + svc.log.Errorf("joinLoop shut down before response sent: %s", parent.Err()) + case <-req.joinCtx.Done(): + svc.log.Errorf("failed to send join response: %s", req.joinCtx.Err()) case req.respCh <- joinResps[i]: } } @@ -388,6 +387,7 @@ func (svc *mgmtSvc) Join(ctx context.Context, req *mgmtpb.JoinReq) (*mgmtpb.Join bjr := &batchJoinRequest{ JoinReq: *req, peerAddr: replyAddr, + joinCtx: ctx, respCh: make(chan *batchJoinResponse), }