Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

service/dap: refine teardown logic #2414

Merged
merged 12 commits into from
Apr 21, 2021
208 changes: 118 additions & 90 deletions service/dap/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,28 +35,40 @@ import (
)

// Server implements a DAP server that can accept a single client for
// a single debug session. It does not support restarting.
// The server operates via two goroutines:
// a single debug session (for now). It does yet not support restarting.
// That means that in addition to explicit shutdown requests,
// program termination and failed or closed client connection
// would also result in stopping this single-use server.
//
// The DAP server operates via the following goroutines:
//
// (1) Main goroutine where the server is created via NewServer(),
// started via Run() and stopped via Stop().
// (2) Run goroutine started from Run() that accepts a client connection,
// started via Run() and stopped via Stop(). Once the server is
// started, this goroutine blocks until it receives a stop-server
// signal that can come from an OS interrupt (such as Ctrl-C) or
// config.DisconnectChan (passed to NewServer()) as a result of
// client connection failure or closure or a DAP disconnect request.
//
// (2) Run goroutine started from Run() that serves as both
// a listener and a client goroutine. It accepts a client connection,
// reads, decodes and processes each request, issuing commands to the
// underlying debugger and sending back events and responses.
// This gorouitne sends a stop-server signal via config.DisconnecChan
// when encounering a client connection error or responding to
// a DAP disconnect request.
//
// TODO(polina): add another layer of per-client goroutines to support multiple clients
// TODO(polina): make it asynchronous (i.e. launch goroutine per request)
type Server struct {
// config is all the information necessary to start the debugger and server.
config *service.Config
// listener is used to accept the client connection.
listener net.Listener
// conn is the accepted client connection.
conn net.Conn
// stopChan is closed when the server is Stop()-ed. This can be used to signal
// stopTriggered is closed when the server is Stop()-ed. This can be used to signal
// to goroutines run by the server that it's time to quit.
stopChan chan struct{}
stopTriggered chan struct{}
// reader is used to read requests from the connection.
reader *bufio.Reader
// debugger is the underlying debugger service.
debugger *debugger.Debugger
// log is used for structured logging.
log *logrus.Entry
// binaryToRemove is the compiled binary to be removed on disconnect.
Expand All @@ -71,8 +83,14 @@ type Server struct {
// args tracks special settings for handling debug session requests.
args launchAttachArgs

// mu synchronizes access to objects set on start-up (from run goroutine)
// and stopped on teardown (from main goroutine)
polinasok marked this conversation as resolved.
Show resolved Hide resolved
mu sync.Mutex

// conn is the accepted client connection.
conn net.Conn
// debugger is the underlying debugger service.
debugger *debugger.Debugger
// noDebugProcess is set for the noDebug launch process.
noDebugProcess *exec.Cmd
}
Expand Down Expand Up @@ -107,17 +125,18 @@ var DefaultLoadConfig = proc.LoadConfig{
}

// NewServer creates a new DAP Server. It takes an opened Listener
// via config and assumes its ownership. config.disconnectChan has to be set;
// it will be closed by the server when the client disconnects or requests
// shutdown. Once disconnectChan is closed, Server.Stop() must be called.
// via config and assumes its ownership. config.DisconnectChan has to be set;
// it will be closed by the server when the client fails to connect,
// disconnects or requests shutdown. Once config.DisconnectChan is closed,
// Server.Stop() must be called to shutdown this single-user server.
func NewServer(config *service.Config) *Server {
logger := logflags.DAPLogger()
logflags.WriteDAPListeningMessage(config.Listener.Addr().String())
logger.Debug("DAP server pid = ", os.Getpid())
return &Server{
config: config,
listener: config.Listener,
stopChan: make(chan struct{}),
stopTriggered: make(chan struct{}),
log: logger,
stackFrameHandles: newHandlesMap(),
variableHandles: newVariablesHandlesMap(),
Expand All @@ -144,53 +163,49 @@ func (s *Server) setLaunchAttachArgs(request dap.LaunchAttachRequest) {

// Stop stops the DAP debugger service, closes the listener and the client
// connection. It shuts down the underlying debugger and kills the target
// process if it was launched by it. This method mustn't be called more than
// once.
// process if it was launched by it or stops the noDebug process.
// This method mustn't be called more than once.
func (s *Server) Stop() {
close(s.stopTriggered)
_ = s.listener.Close()
close(s.stopChan)

s.mu.Lock()
defer s.mu.Unlock()
if s.conn != nil {
// Unless Stop() was called after serveDAPCodec()
// returned, this will result in closed connection error
// on next read, breaking out of the read loop and
// allowing the run goroutine to exit.
_ = s.conn.Close()
}

if s.debugger != nil {
kill := s.config.Debugger.AttachPid == 0
if err := s.debugger.Detach(kill); err != nil {
switch err.(type) {
case *proc.ErrProcessExited:
s.log.Debug(err)
default:
s.log.Error(err)
}
}
killProcess := s.config.Debugger.AttachPid == 0
s.stopDebugSession(killProcess)
} else {
s.stopNoDebugProcess()
}
}

// signalDisconnect closes config.DisconnectChan if not nil, which
// signals that the client disconnected or there was a client
// connection failure. Since the server currently services only one
// client, this can be used as a signal to the entire server via
// Stop(). The function safeguards agaist closing the channel more
// triggerServerStop closes config.DisconnectChan if not nil, which
// signals that client sent a disconnect request or there was connection
// failure or closure. Since the server currently services only one
// client, this is used as a signal to stop the entire server.
// The function safeguards agaist closing the channel more
// than once and can be called multiple times. It is not thread-safe
// and is currently only called from the run goroutine.
// TODO(polina): lock this when we add more goroutines that could call
// this when we support asynchronous request-response communication.
func (s *Server) signalDisconnect() {
func (s *Server) triggerServerStop() {
// Avoid accidentally closing the channel twice and causing a panic, when
// this function is called more than once. For example, we could have the
// following sequence of events:
// -- run goroutine: calls onDisconnectRequest()
// -- run goroutine: calls signalDisconnect()
// -- run goroutine: calls triggerServerStop()
// -- main goroutine: calls Stop()
// -- main goroutine: Stop() closes client connection
// -- main goroutine: Stop() closes client connection (or client closed it)
// -- run goroutine: serveDAPCodec() gets "closed network connection"
// -- run goroutine: serveDAPCodec() returns
// -- run goroutine: serveDAPCodec calls signalDisconnect()
// -- run goroutine: serveDAPCodec() returns and calls triggerServerStop()
if s.config.DisconnectChan != nil {
close(s.config.DisconnectChan)
s.config.DisconnectChan = nil
Expand All @@ -210,26 +225,27 @@ func (s *Server) signalDisconnect() {
// so the editor needs to launch delve only once?
func (s *Server) Run() {
go func() {
conn, err := s.listener.Accept()
conn, err := s.listener.Accept() // listener is closed in Stop()
if err != nil {
select {
case <-s.stopChan:
case <-s.stopTriggered:
default:
s.log.Errorf("Error accepting client connection: %s\n", err)
s.triggerServerStop()
}
s.signalDisconnect()
return
}
s.conn = conn
s.mu.Lock()
s.conn = conn // closed in Stop()
s.mu.Unlock()
s.serveDAPCodec()
}()
}

// serveDAPCodec reads and decodes requests from the client
// until it encounters an error or EOF, when it sends
// the disconnect signal and returns.
// a disconnect signal and returns.
func (s *Server) serveDAPCodec() {
defer s.signalDisconnect()
s.reader = bufio.NewReader(s.conn)
for {
request, err := dap.ReadProtocolMessage(s.reader)
Expand All @@ -241,14 +257,13 @@ func (s *Server) serveDAPCodec() {
// TODO(polina): to support this add Seq to
// dap.DecodeProtocolMessageFieldError.
if err != nil {
stopRequested := false
select {
case <-s.stopChan:
stopRequested = true
case <-s.stopTriggered:
default:
}
if err != io.EOF && !stopRequested {
s.log.Error("DAP error: ", err)
if err != io.EOF {
s.log.Error("DAP error: ", err)
}
s.triggerServerStop()
}
return
}
Expand Down Expand Up @@ -557,7 +572,9 @@ func (s *Server) onLaunchRequest(request *dap.LaunchRequest) {
}

if noDebug, ok := request.Arguments["noDebug"].(bool); ok && noDebug {
s.mu.Lock()
cmd, err := s.startNoDebugProcess(program, targetArgs, s.config.Debugger.WorkingDir)
s.mu.Unlock()
if err != nil {
s.sendErrorResponse(request.Request, FailedToLaunch, "Failed to launch", err.Error())
return
Expand All @@ -584,17 +601,24 @@ func (s *Server) onLaunchRequest(request *dap.LaunchRequest) {
}

var err error
if s.debugger, err = debugger.New(&s.config.Debugger, s.config.ProcessArgs); err != nil {
s.mu.Lock()
s.debugger, err = debugger.New(&s.config.Debugger, s.config.ProcessArgs)
s.mu.Unlock()
if err != nil {
s.sendErrorResponse(request.Request, FailedToLaunch, "Failed to launch", err.Error())
return
}

// Notify the client that the debugger is ready to start accepting
// configuration requests for setting breakpoints, etc. The client
// will end the configuration sequence with 'configurationDone'.
s.send(&dap.InitializedEvent{Event: *newEvent("initialized")})
s.send(&dap.LaunchResponse{Response: *newResponse(request.Request)})
}

// startNoDebugProcess is called from onLaunchRequest (run goroutine) and
// requires holding mu lock.
func (s *Server) startNoDebugProcess(program string, targetArgs []string, wd string) (*exec.Cmd, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.noDebugProcess != nil {
return nil, fmt.Errorf("another launch request is in progress")
}
Expand All @@ -607,16 +631,14 @@ func (s *Server) startNoDebugProcess(program string, targetArgs []string, wd str
return cmd, nil
}

// stopNoDebugProcess is called from Stop (main goroutine) and
// onDisconnectRequest (run goroutine) and requires holding mu lock.
func (s *Server) stopNoDebugProcess() {
s.mu.Lock()
p := s.noDebugProcess
s.noDebugProcess = nil
defer s.mu.Unlock()
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hyangah Was this meant to be regular unlock to minimize scope (hence the temp var p var)? With my new change, the entire function gets locked (so I shouldn't need p anymore). Not ideal? But at the same time, what else is there to do when we are existing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Heh, this is a bug - I meant s.mu.Unlock(), not defer s.mu.Unlock. :-)

Yeah, the intention was to reduce the scope - so the goroutine that's blocked onLaunchRequest (waiting for the termination of s.nodebugProcess) can proceed as soon as it's unblocked when p's killed. But you're right. We don't need to be too clever here.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for clarifying. I remove the temp var "p".


// TODO(hyangah): gracefully terminate the process and its children processes.
if p != nil && !p.ProcessState.Exited() {
p.Process.Kill() // Don't check error. Process killing and self-termination may race.
if s.noDebugProcess != nil && !s.noDebugProcess.ProcessState.Exited() {
s.noDebugProcess.Process.Kill() // Don't check error. Process killing and self-termination may race.
}
s.noDebugProcess = nil
}

// TODO(polina): support "remote" mode
Expand All @@ -634,39 +656,43 @@ func isValidLaunchMode(launchMode interface{}) bool {
// (in our case this TCP server) can be terminated.
func (s *Server) onDisconnectRequest(request *dap.DisconnectRequest) {
s.send(&dap.DisconnectResponse{Response: *newResponse(request.Request)})
defer s.triggerServerStop()
s.mu.Lock()
defer s.mu.Unlock()
if s.debugger != nil {
_, err := s.debugger.Command(&api.DebuggerCommand{Name: api.Halt}, nil)
if err != nil {
switch err.(type) {
case *proc.ErrProcessExited:
s.log.Debug(err)
default:
s.log.Error(err)
}
}
// We always kill launched programs
kill := s.config.Debugger.AttachPid == 0
// We always kill launched programs.
// In case of attach, we leave the program
// running but default, which can be
// running by default, which can be
// overridden by an explicit request to terminate.
if request.Arguments.TerminateDebuggee {
kill = true
}
err = s.debugger.Detach(kill)
if err != nil {
switch err.(type) {
case *proc.ErrProcessExited:
s.log.Debug(err)
default:
s.log.Error(err)
}
}
killProcess := s.config.Debugger.AttachPid == 0 || request.Arguments.TerminateDebuggee
s.stopDebugSession(killProcess)
} else {
s.stopNoDebugProcess()
}
}

// TODO(polina): make thread-safe when handlers become asynchronous.
s.signalDisconnect()
// stopDebugSession is called from Stop (main goroutine) and
// onDisconnectRequest (run goroutine) and requires holding mu lock.
func (s *Server) stopDebugSession(killProcess bool) {
if s.debugger == nil {
return
}
_, err := s.debugger.Command(&api.DebuggerCommand{Name: api.Halt}, nil)
Copy link
Collaborator Author

@polinasok polinasok Apr 4, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why doesn't rpc server halt in Stop()?

func (s *ServerImpl) Stop() error {
if s.config.AcceptMulti {
close(s.stopChan)
s.listener.Close()
}
kill := s.config.Debugger.AttachPid == 0
return s.debugger.Detach(kill)
?
On a related note, the terminology collision here is unfortunate: https://github.com/go-delve/delve/blob/master/Documentation/api/ClientHowto.md#gracefully-ending-the-debug-session
From the RPC client point of view, it appears that

  • Detach = detach debugger from the process (maybe kill it) + close client connection
  • Disconnect = close client connection
    But the client how-to uses a phrase "disconnecting a running program" as a case where Detach is called. And then we have DAP where disconnect request is sent to "disconnect from the debuggee and to terminate the debug adapter". Sigh.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wild guess - isn't it because it's the rpc client who is supposed to call Halt (or Disconnect) command before the server stops. Not sure about the case where the debugging stops because client unexpectedly crashes or gets disconnected.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's precisely the case I was thinking about. The client takes care of Halt before Detach. But if there is an error reading rpc messages from client, the server will go into shutdown mode and call Stop() without halting.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there's a particular reason, halting would probably be better. And the client howto should say detach where it says ""disconnecting a running program".

if err != nil {
s.log.Debug(err)
}
if err == proc.ErrProcessDetached {
return
}
err = s.debugger.Detach(killProcess)
if err != nil {
switch err.(type) {
case *proc.ErrProcessExited:
s.log.Debug(err)
default:
s.log.Error(err)
}
}
}

func (s *Server) onSetBreakpointsRequest(request *dap.SetBreakpointsRequest) {
Expand Down Expand Up @@ -840,9 +866,11 @@ func (s *Server) onAttachRequest(request *dap.AttachRequest) {
s.config.Debugger.AttachPid = int(pid)
s.setLaunchAttachArgs(request)
var err error
if s.debugger, err = debugger.New(&s.config.Debugger, nil); err != nil {
s.sendErrorResponse(request.Request,
FailedToAttach, "Failed to attach", err.Error())
s.mu.Lock()
s.debugger, err = debugger.New(&s.config.Debugger, nil)
s.mu.Unlock()
if err != nil {
s.sendErrorResponse(request.Request, FailedToAttach, "Failed to attach", err.Error())
return
}
} else {
Expand Down Expand Up @@ -1339,7 +1367,7 @@ func (s *Server) onEvaluateRequest(request *dap.EvaluateRequest) {
if retVars == nil {
// The call got interrupted by a stop (e.g. breakpoint in injected
// function call or in another goroutine)
s.resetHandlesForStop()
s.resetHandlesForStoppedEvent()
stopped := &dap.StoppedEvent{Event: *newEvent("stopped")}
stopped.Body.AllThreadsStopped = true
if state.SelectedGoroutine != nil {
Expand Down Expand Up @@ -1518,7 +1546,7 @@ func newEvent(event string) *dap.Event {
const BetterBadAccessError = `invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation]
Unable to propagate EXC_BAD_ACCESS signal to target process and panic (see https://github.com/go-delve/delve/issues/852)`

func (s *Server) resetHandlesForStop() {
func (s *Server) resetHandlesForStoppedEvent() {
s.stackFrameHandles.reset()
s.variableHandles.reset()
}
Expand All @@ -1538,7 +1566,7 @@ func (s *Server) doCommand(command string) {
return
}

s.resetHandlesForStop()
s.resetHandlesForStoppedEvent()
stopped := &dap.StoppedEvent{Event: *newEvent("stopped")}
stopped.Body.AllThreadsStopped = true

Expand Down
Loading