diff --git a/src/cmd/cgo/internal/testcarchive/carchive_test.go b/src/cmd/cgo/internal/testcarchive/carchive_test.go index a8eebead25dc9f..c263b82d5768f4 100644 --- a/src/cmd/cgo/internal/testcarchive/carchive_test.go +++ b/src/cmd/cgo/internal/testcarchive/carchive_test.go @@ -33,7 +33,7 @@ import ( "unicode" ) -var globalSkip = func(t *testing.T) {} +var globalSkip = func(t testing.TB) {} // Program to run. var bin []string @@ -59,12 +59,12 @@ func TestMain(m *testing.M) { func testMain(m *testing.M) int { if testing.Short() && os.Getenv("GO_BUILDER_NAME") == "" { - globalSkip = func(t *testing.T) { t.Skip("short mode and $GO_BUILDER_NAME not set") } + globalSkip = func(t testing.TB) { t.Skip("short mode and $GO_BUILDER_NAME not set") } return m.Run() } if runtime.GOOS == "linux" { if _, err := os.Stat("/etc/alpine-release"); err == nil { - globalSkip = func(t *testing.T) { t.Skip("skipping failing test on alpine - go.dev/issue/19938") } + globalSkip = func(t testing.TB) { t.Skip("skipping failing test on alpine - go.dev/issue/19938") } return m.Run() } } @@ -1291,8 +1291,8 @@ func TestPreemption(t *testing.T) { } } -// Issue 59294. Test calling Go function from C after using some -// stack space. +// Issue 59294 and 68285. Test calling Go function from C after with +// various stack space. func TestDeepStack(t *testing.T) { globalSkip(t) testenv.MustHaveGoBuild(t) @@ -1350,6 +1350,53 @@ func TestDeepStack(t *testing.T) { } } +func BenchmarkCgoCallbackMainThread(b *testing.B) { + // Benchmark for calling into Go fron C main thread. + // See issue #68587. + // + // It uses a subprocess, which is a C binary that calls + // Go on the main thread b.N times. There is some overhead + // for launching the subprocess. It is probably fine when + // b.N is large. + + globalSkip(b) + testenv.MustHaveGoBuild(b) + testenv.MustHaveCGO(b) + testenv.MustHaveBuildMode(b, "c-archive") + + if !testWork { + defer func() { + os.Remove("testp10" + exeSuffix) + os.Remove("libgo10.a") + os.Remove("libgo10.h") + }() + } + + cmd := exec.Command("go", "build", "-buildmode=c-archive", "-o", "libgo10.a", "./libgo10") + out, err := cmd.CombinedOutput() + b.Logf("%v\n%s", cmd.Args, out) + if err != nil { + b.Fatal(err) + } + + ccArgs := append(cc, "-o", "testp10"+exeSuffix, "main10.c", "libgo10.a") + out, err = exec.Command(ccArgs[0], ccArgs[1:]...).CombinedOutput() + b.Logf("%v\n%s", ccArgs, out) + if err != nil { + b.Fatal(err) + } + + argv := cmdToRun("./testp10") + argv = append(argv, fmt.Sprint(b.N)) + cmd = exec.Command(argv[0], argv[1:]...) + + b.ResetTimer() + err = cmd.Run() + if err != nil { + b.Fatal(err) + } +} + func TestSharedObject(t *testing.T) { // Test that we can put a Go c-archive into a C shared object. globalSkip(t) diff --git a/src/cmd/cgo/internal/testcarchive/testdata/libgo10/a.go b/src/cmd/cgo/internal/testcarchive/testdata/libgo10/a.go new file mode 100644 index 00000000000000..803a0fa5f1cb35 --- /dev/null +++ b/src/cmd/cgo/internal/testcarchive/testdata/libgo10/a.go @@ -0,0 +1,12 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "C" + +//export GoF +func GoF() {} + +func main() {} diff --git a/src/cmd/cgo/internal/testcarchive/testdata/libgo9/a.go b/src/cmd/cgo/internal/testcarchive/testdata/libgo9/a.go index acb08d90ecd5bf..3528bef654ddb3 100644 --- a/src/cmd/cgo/internal/testcarchive/testdata/libgo9/a.go +++ b/src/cmd/cgo/internal/testcarchive/testdata/libgo9/a.go @@ -6,9 +6,29 @@ package main import "runtime" +// extern void callGoWithVariousStack(int); import "C" func main() {} //export GoF -func GoF() { runtime.GC() } +func GoF(p int32) { + runtime.GC() + if p != 0 { + panic("panic") + } +} + +//export callGoWithVariousStackAndGoFrame +func callGoWithVariousStackAndGoFrame(p int32) { + if p != 0 { + defer func() { + e := recover() + if e == nil { + panic("did not panic") + } + runtime.GC() + }() + } + C.callGoWithVariousStack(C.int(p)); +} diff --git a/src/cmd/cgo/internal/testcarchive/testdata/main10.c b/src/cmd/cgo/internal/testcarchive/testdata/main10.c new file mode 100644 index 00000000000000..53c3c83a99e35c --- /dev/null +++ b/src/cmd/cgo/internal/testcarchive/testdata/main10.c @@ -0,0 +1,22 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include + +#include "libgo10.h" + +int main(int argc, char **argv) { + int n, i; + + if (argc != 2) { + perror("wrong arg"); + return 2; + } + n = atoi(argv[1]); + for (i = 0; i < n; i++) + GoF(); + + return 0; +} diff --git a/src/cmd/cgo/internal/testcarchive/testdata/main9.c b/src/cmd/cgo/internal/testcarchive/testdata/main9.c index 95ad4dea49fb1a..e641d8a8027a5f 100644 --- a/src/cmd/cgo/internal/testcarchive/testdata/main9.c +++ b/src/cmd/cgo/internal/testcarchive/testdata/main9.c @@ -6,19 +6,27 @@ void use(int *x) { (*x)++; } -void callGoFWithDeepStack() { +void callGoFWithDeepStack(int p) { int x[10000]; use(&x[0]); use(&x[9999]); - GoF(); + GoF(p); use(&x[0]); use(&x[9999]); } +void callGoWithVariousStack(int p) { + GoF(0); // call GoF without using much stack + callGoFWithDeepStack(p); // call GoF with a deep stack + GoF(0); // again on a shallow stack +} + int main() { - GoF(); // call GoF without using much stack - callGoFWithDeepStack(); // call GoF with a deep stack + callGoWithVariousStack(0); + + callGoWithVariousStackAndGoFrame(0); // normal execution + callGoWithVariousStackAndGoFrame(1); // panic and recover } diff --git a/src/runtime/cgo/gcc_stack_unix.c b/src/runtime/cgo/gcc_stack_unix.c index fcb03d0dea7e34..df0049a4f37ab3 100644 --- a/src/runtime/cgo/gcc_stack_unix.c +++ b/src/runtime/cgo/gcc_stack_unix.c @@ -31,10 +31,11 @@ x_cgo_getstackbound(uintptr bounds[2]) pthread_attr_get_np(pthread_self(), &attr); pthread_attr_getstack(&attr, &addr, &size); // low address #else - // We don't know how to get the current stacks, so assume they are the - // same as the default stack bounds. - pthread_attr_getstacksize(&attr, &size); - addr = __builtin_frame_address(0) + 4096 - size; + // We don't know how to get the current stacks, leave it as + // 0 and the caller will use an estimate based on the current + // SP. + addr = 0; + size = 0; #endif pthread_attr_destroy(&attr); diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index 18a10041182356..0effcb8053a561 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -231,34 +231,6 @@ func cgocall(fn, arg unsafe.Pointer) int32 { func callbackUpdateSystemStack(mp *m, sp uintptr, signal bool) { g0 := mp.g0 - inBound := sp > g0.stack.lo && sp <= g0.stack.hi - if mp.ncgo > 0 && !inBound { - // ncgo > 0 indicates that this M was in Go further up the stack - // (it called C and is now receiving a callback). - // - // !inBound indicates that we were called with SP outside the - // expected system stack bounds (C changed the stack out from - // under us between the cgocall and cgocallback?). - // - // It is not safe for the C call to change the stack out from - // under us, so throw. - - // Note that this case isn't possible for signal == true, as - // that is always passing a new M from needm. - - // Stack is bogus, but reset the bounds anyway so we can print. - hi := g0.stack.hi - lo := g0.stack.lo - g0.stack.hi = sp + 1024 - g0.stack.lo = sp - 32*1024 - g0.stackguard0 = g0.stack.lo + stackGuard - g0.stackguard1 = g0.stackguard0 - - print("M ", mp.id, " procid ", mp.procid, " runtime: cgocallback with sp=", hex(sp), " out of bounds [", hex(lo), ", ", hex(hi), "]") - print("\n") - exit(2) - } - if !mp.isextra { // We allocated the stack for standard Ms. Don't replace the // stack bounds with estimated ones when we already initialized @@ -266,26 +238,37 @@ func callbackUpdateSystemStack(mp *m, sp uintptr, signal bool) { return } - // This M does not have Go further up the stack. However, it may have - // previously called into Go, initializing the stack bounds. Between - // that call returning and now the stack may have changed (perhaps the - // C thread is running a coroutine library). We need to update the - // stack bounds for this case. + inBound := sp > g0.stack.lo && sp <= g0.stack.hi + if inBound && mp.g0StackAccurate { + // This M has called into Go before and has the stack bounds + // initialized. We have the accurate stack bounds, and the SP + // is in bounds. We expect it continues to run within the same + // bounds. + return + } + + // We don't have an accurate stack bounds (either it never calls + // into Go before, or we couldn't get the accurate bounds), or the + // current SP is not within the previous bounds (the stack may have + // changed between calls). We need to update the stack bounds. // // N.B. we need to update the stack bounds even if SP appears to - // already be in bounds. Our "bounds" may actually be estimated dummy - // bounds (below). The actual stack bounds could have shifted but still - // have partial overlap with our dummy bounds. If we failed to update - // in that case, we could find ourselves seemingly called near the - // bottom of the stack bounds, where we quickly run out of space. + // already be in bounds, if our bounds are estimated dummy bounds + // (below). We may be in a different region within the same actual + // stack bounds, but our estimates were not accurate. Or the actual + // stack bounds could have shifted but still have partial overlap with + // our dummy bounds. If we failed to update in that case, we could find + // ourselves seemingly called near the bottom of the stack bounds, where + // we quickly run out of space. // Set the stack bounds to match the current stack. If we don't // actually know how big the stack is, like we don't know how big any // scheduling stack is, but we assume there's at least 32 kB. If we // can get a more accurate stack bound from pthread, use that, provided - // it actually contains SP.. + // it actually contains SP. g0.stack.hi = sp + 1024 g0.stack.lo = sp - 32*1024 + mp.g0StackAccurate = false if !signal && _cgo_getstackbound != nil { // Don't adjust if called from the signal handler. // We are on the signal stack, not the pthread stack. @@ -296,12 +279,16 @@ func callbackUpdateSystemStack(mp *m, sp uintptr, signal bool) { asmcgocall(_cgo_getstackbound, unsafe.Pointer(&bounds)) // getstackbound is an unsupported no-op on Windows. // + // On Unix systems, if the API to get accurate stack bounds is + // not available, it returns zeros. + // // Don't use these bounds if they don't contain SP. Perhaps we // were called by something not using the standard thread // stack. if bounds[0] != 0 && sp > bounds[0] && sp <= bounds[1] { g0.stack.lo = bounds[0] g0.stack.hi = bounds[1] + mp.g0StackAccurate = true } } g0.stackguard0 = g0.stack.lo + stackGuard @@ -319,6 +306,8 @@ func cgocallbackg(fn, frame unsafe.Pointer, ctxt uintptr) { } sp := gp.m.g0.sched.sp // system sp saved by cgocallback. + oldStack := gp.m.g0.stack + oldAccurate := gp.m.g0StackAccurate callbackUpdateSystemStack(gp.m, sp, false) // The call from C is on gp.m's g0 stack, so we must ensure @@ -380,6 +369,12 @@ func cgocallbackg(fn, frame unsafe.Pointer, ctxt uintptr) { reentersyscall(savedpc, uintptr(savedsp), uintptr(savedbp)) gp.m.winsyscall = winsyscall + + // Restore the old g0 stack bounds + gp.m.g0.stack = oldStack + gp.m.g0.stackguard0 = oldStack.lo + stackGuard + gp.m.g0.stackguard1 = gp.m.g0.stackguard0 + gp.m.g0StackAccurate = oldAccurate } func cgocallbackg1(fn, frame unsafe.Pointer, ctxt uintptr) { diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 7ff339ea465461..e2e6dbdd3f72ef 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2550,6 +2550,7 @@ func dropm() { g0.stack.lo = 0 g0.stackguard0 = 0 g0.stackguard1 = 0 + mp.g0StackAccurate = false putExtraM(mp) diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 34aefd4c47badc..5e75e154e69df1 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -530,47 +530,48 @@ type m struct { _ uint32 // align next field to 8 bytes // Fields not known to debuggers. - procid uint64 // for debuggers, but offset not hard-coded - gsignal *g // signal-handling g - goSigStack gsignalStack // Go-allocated signal handling stack - sigmask sigset // storage for saved signal mask - tls [tlsSlots]uintptr // thread-local storage (for x86 extern register) - mstartfn func() - curg *g // current running goroutine - caughtsig guintptr // goroutine running during fatal signal - p puintptr // attached p for executing go code (nil if not executing go code) - nextp puintptr - oldp puintptr // the p that was attached before executing a syscall - id int64 - mallocing int32 - throwing throwType - preemptoff string // if != "", keep curg running on this m - locks int32 - dying int32 - profilehz int32 - spinning bool // m is out of work and is actively looking for work - blocked bool // m is blocked on a note - newSigstack bool // minit on C thread called sigaltstack - printlock int8 - incgo bool // m is executing a cgo call - isextra bool // m is an extra m - isExtraInC bool // m is an extra m that is not executing Go code - isExtraInSig bool // m is an extra m in a signal handler - freeWait atomic.Uint32 // Whether it is safe to free g0 and delete m (one of freeMRef, freeMStack, freeMWait) - needextram bool - traceback uint8 - ncgocall uint64 // number of cgo calls in total - ncgo int32 // number of cgo calls currently in progress - cgoCallersUse atomic.Uint32 // if non-zero, cgoCallers in use temporarily - cgoCallers *cgoCallers // cgo traceback if crashing in cgo call - park note - alllink *m // on allm - schedlink muintptr - lockedg guintptr - createstack [32]uintptr // stack that created this thread, it's used for StackRecord.Stack0, so it must align with it. - lockedExt uint32 // tracking for external LockOSThread - lockedInt uint32 // tracking for internal lockOSThread - nextwaitm muintptr // next m waiting for lock + procid uint64 // for debuggers, but offset not hard-coded + gsignal *g // signal-handling g + goSigStack gsignalStack // Go-allocated signal handling stack + sigmask sigset // storage for saved signal mask + tls [tlsSlots]uintptr // thread-local storage (for x86 extern register) + mstartfn func() + curg *g // current running goroutine + caughtsig guintptr // goroutine running during fatal signal + p puintptr // attached p for executing go code (nil if not executing go code) + nextp puintptr + oldp puintptr // the p that was attached before executing a syscall + id int64 + mallocing int32 + throwing throwType + preemptoff string // if != "", keep curg running on this m + locks int32 + dying int32 + profilehz int32 + spinning bool // m is out of work and is actively looking for work + blocked bool // m is blocked on a note + newSigstack bool // minit on C thread called sigaltstack + printlock int8 + incgo bool // m is executing a cgo call + isextra bool // m is an extra m + isExtraInC bool // m is an extra m that is not executing Go code + isExtraInSig bool // m is an extra m in a signal handler + freeWait atomic.Uint32 // Whether it is safe to free g0 and delete m (one of freeMRef, freeMStack, freeMWait) + needextram bool + g0StackAccurate bool // whether the g0 stack has accurate bounds + traceback uint8 + ncgocall uint64 // number of cgo calls in total + ncgo int32 // number of cgo calls currently in progress + cgoCallersUse atomic.Uint32 // if non-zero, cgoCallers in use temporarily + cgoCallers *cgoCallers // cgo traceback if crashing in cgo call + park note + alllink *m // on allm + schedlink muintptr + lockedg guintptr + createstack [32]uintptr // stack that created this thread, it's used for StackRecord.Stack0, so it must align with it. + lockedExt uint32 // tracking for external LockOSThread + lockedInt uint32 // tracking for internal lockOSThread + nextwaitm muintptr // next m waiting for lock mLockProfile mLockProfile // fields relating to runtime.lock contention profStack []uintptr // used for memory/block/mutex stack traces