Skip to content

Commit

Permalink
runtime: wake netpoller when dropping P, don't sleep too long in sysmon
Browse files Browse the repository at this point in the history
When dropping a P, if it has any timers, and if some thread is
sleeping in the netpoller, wake the netpoller to run the P's timers.
This mitigates races between the netpoller deciding how long to sleep
and a new timer being added.

In sysmon, if all P's are idle, check the timers to decide how long to sleep.
This avoids oversleeping if no thread is using the netpoller.
This can happen in particular if some threads use runtime.LockOSThread,
as those threads do not block in the netpoller.

Also, print the number of timers per P for GODEBUG=scheddetail=1.

Before this CL, TestLockedDeadlock2 would fail about 1% of the time.
With this CL, I ran it 150,000 times with no failures.

Updates #6239
Updates #27707
Fixes #35274
Fixes #35288

Change-Id: I7e5193e6c885e567f0b1ee023664aa3e2902fcd1
Reviewed-on: https://go-review.googlesource.com/c/go/+/204800
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
  • Loading branch information
ianlancetaylor committed Nov 4, 2019
1 parent 5a7c571 commit d80ab3e
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 23 deletions.
49 changes: 26 additions & 23 deletions src/runtime/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -1964,6 +1964,9 @@ func handoffp(_p_ *p) {
startm(_p_, false)
return
}
if when := nobarrierWakeTime(_p_); when != 0 {
wakeNetPoller(when)
}
pidleput(_p_)
unlock(&sched.lock)
}
Expand Down Expand Up @@ -4448,32 +4451,33 @@ func sysmon() {
delay = 10 * 1000
}
usleep(delay)
now := nanotime()
if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) {
lock(&sched.lock)
if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) {
atomic.Store(&sched.sysmonwait, 1)
unlock(&sched.lock)
// Make wake-up period small enough
// for the sampling to be correct.
maxsleep := forcegcperiod / 2
shouldRelax := true
if osRelaxMinNS > 0 {
next := timeSleepUntil()
now := nanotime()
if next-now < osRelaxMinNS {
shouldRelax = false
next := timeSleepUntil()
if next > now {
atomic.Store(&sched.sysmonwait, 1)
unlock(&sched.lock)
// Make wake-up period small enough
// for the sampling to be correct.
sleep := forcegcperiod / 2
if next-now < sleep {
sleep = next - now
}
shouldRelax := sleep >= osRelaxMinNS
if shouldRelax {
osRelax(true)
}
notetsleep(&sched.sysmonnote, sleep)
if shouldRelax {
osRelax(false)
}
now = nanotime()
lock(&sched.lock)
atomic.Store(&sched.sysmonwait, 0)
noteclear(&sched.sysmonnote)
}
if shouldRelax {
osRelax(true)
}
notetsleep(&sched.sysmonnote, maxsleep)
if shouldRelax {
osRelax(false)
}
lock(&sched.lock)
atomic.Store(&sched.sysmonwait, 0)
noteclear(&sched.sysmonnote)
idle = 0
delay = 20
}
Expand All @@ -4485,7 +4489,6 @@ func sysmon() {
}
// poll network if not polled for more than 10ms
lastpoll := int64(atomic.Load64(&sched.lastpoll))
now := nanotime()
if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
list := netpoll(0) // non-blocking - returns list of goroutines
Expand Down Expand Up @@ -4691,7 +4694,7 @@ func schedtrace(detailed bool) {
if mp != nil {
id = mp.id
}
print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, "\n")
print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, " timerslen=", len(_p_.timers), "\n")
} else {
// In non-detailed mode format lengths of per-P run queues as:
// [len1 len2 len3 len4]
Expand Down
21 changes: 21 additions & 0 deletions src/runtime/time.go
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,27 @@ func addAdjustedTimers(pp *p, moved []*timer) {
}
}

// nobarrierWakeTime looks at P's timers and returns the time when we
// should wake up the netpoller. It returns 0 if there are no timers.
// This function is invoked when dropping a P, and must run without
// any write barriers. Therefore, if there are any timers that needs
// to be moved earlier, it conservatively returns the current time.
// The netpoller M will wake up and adjust timers before sleeping again.
//go:nowritebarrierrec
func nobarrierWakeTime(pp *p) int64 {
lock(&pp.timersLock)
ret := int64(0)
if len(pp.timers) > 0 {
if atomic.Load(&pp.adjustTimers) > 0 {
ret = nanotime()
} else {
ret = pp.timers[0].when
}
}
unlock(&pp.timersLock)
return ret
}

// runtimer examines the first timer in timers. If it is ready based on now,
// it runs the timer and removes or updates it.
// Returns 0 if it ran a timer, -1 if there are no more timers, or the time
Expand Down

0 comments on commit d80ab3e

Please sign in to comment.