Skip to content

Commit

Permalink
test: make cluster test more time tolerant on more platforms
Browse files Browse the repository at this point in the history
The problem
nodejs/node-v0.x-archive@f3f4e28
resolves also affects Alpine Linux (musl libc) so wait up to 1 second
for all platforms. But poll regularily so we don´t need to wait more
than necessary.

Also add some explaining comments on why it takes so long for the child
processes to die.

Signed-off-by: Natanael Copa <ncopa@alpinelinux.org>
  • Loading branch information
ncopa committed May 3, 2016
1 parent 51e7bc8 commit 6e7ef0e
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 18 deletions.
23 changes: 13 additions & 10 deletions test/parallel/test-cluster-master-error.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,15 @@ if (cluster.isWorker) {
// Check that the cluster died accidently
existMaster = !!code;

// Give the workers time to shut down
var timeout = 200;
if (common.isAix) {
// AIX needs more time due to default exit performance
timeout = 1000;
}
setTimeout(checkWorkers, timeout);

function checkWorkers() {
// Give the workers time to shut down.
// Since the parent process of the workers does not clean up after the
// forked children they will end up as zombie processes. pid 1 (init) will
// reparent those and eventually reap them. This takes normally <200ms but
// some init systems, like AIX and busybox init, needs a bit more.
var timeout = 1000;

var waitWorker = setInterval(function() {
timeout -= 10;
// When master is dead all workers should be dead to
var alive = false;
workers.forEach(function(pid) {
Expand All @@ -111,7 +111,10 @@ if (cluster.isWorker) {

// If a worker was alive this did not act as expected
existWorker = !alive;
}

if (!alive || (timeout <= 0))
clearInterval(waitWorker);
}, 10);
});

process.once('exit', function() {
Expand Down
24 changes: 16 additions & 8 deletions test/parallel/test-cluster-master-kill.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ if (cluster.isWorker) {
var isAlive = function(pid) {
try {
//this will throw an error if the process is dead
// note that the master does not clean up after the children so
// even dead children will return as 'alive' zombies here.
// when master exits pid 1 (init) will take over as parent for the
// zombie children and will eventually reap them.
process.kill(pid, 0);

return true;
Expand All @@ -58,21 +62,25 @@ if (cluster.isWorker) {
// make sure that the master died by purpose
assert.equal(code, 0);

// check worker process status
var timeout = 200;
if (common.isAix) {
// AIX needs more time due to default exit performance
timeout = 1000;
}
setTimeout(function() {
// wait for init (pid 1) to collect the worker process
// normally 200ms is enough, but it depends on the init (pid 1)
// implementation. AIX's init and busybox init need more. We wait
// up to 1 sec (1000ms) before we trigger error.
var timeout = 1000;

var waitWorker = setInterval(function() {
timeout -= 10;
alive = isAlive(pid);
}, timeout);
if (!alive || (timeout <= 0))
clearInterval(waitWorker);
}, 10);
});

process.once('exit', function() {
// cleanup: kill the worker if alive
if (alive) {
process.kill(pid);
// we need waitpid(pid) here to avoid a zombie worker
}

assert.equal(typeof pid, 'number', 'did not get worker pid info');
Expand Down

0 comments on commit 6e7ef0e

Please sign in to comment.