Skip to content

Commit

Permalink
avoid a deadlock in the idle watch reaper
Browse files Browse the repository at this point in the history
Summary:
here's an example of the deadlock:

```
Thread 53 (Thread 0x7f3f17fff700 (LWP 3247723)):
 #0  __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
 #1  0x00007f3f1dadbf2c in __GI___pthread_mutex_lock (mutex=mutex@entry=0x68f8e0 <root_lock>) at ../nptl/pthread_mutex_lock.c:80
 #2  0x0000000000417b66 in remove_root_from_watched (root=0x692380) at root.c:1872
 #3  w_root_stop_watch (root=root@entry=0x692380) at root.c:2251
 #4  0x000000000041854b in consider_reap (root=0x692380) at root.c:1601
 #5  notify_thread (root=0x692380) at root.c:1656
 #6  run_notify_thread (arg=0x692380) at root.c:2148
 #7  0x00007f3f1dad97f1 in start_thread (arg=0x7f3f17fff700) at pthread_create.c:310
 #8  0x00007f3f1d81046d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109

Thread 52 (Thread 0x7f3f175fd700 (LWP 3247724)):
 #0  __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
 #1  0x00007f3f1dadbfb0 in __GI___pthread_mutex_lock (mutex=mutex@entry=0x692398) at ../nptl/pthread_mutex_lock.c:115
 #2  0x0000000000415255 in w_root_lock (root=root@entry=0x692380) at root.c:313
 #3  0x0000000000418bdc in w_root_save_state (state=state@entry=0x7f3f080480d0) at root.c:2444
 #4  0x0000000000418ff8 in w_state_save () at state.c:98
 #5  0x0000000000417bef in w_root_stop_watch (root=root@entry=0x693780) at root.c:2255
 #6  0x000000000041854b in consider_reap (root=0x693780) at root.c:1601
 #7  notify_thread (root=0x693780) at root.c:1656
 #8  run_notify_thread (arg=0x693780) at root.c:2148
 #9  0x00007f3f1dad97f1 in start_thread (arg=0x7f3f175fd700) at pthread_create.c:310
 #10 0x00007f3f1d81046d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
```

Test Plan:
augmented an integration test, but I couldn't get this to trigger on demand :-/
In the problem case, both watches were established from the state file and aged at at
the exact same time.

Reviewers: sid0

Differential Revision: https://reviews.facebook.net/D44841
  • Loading branch information
wez committed Aug 20, 2015
1 parent 29f2df5 commit 59d2ad0
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 5 deletions.
13 changes: 9 additions & 4 deletions root.c
Original file line number Diff line number Diff line change
Expand Up @@ -1674,6 +1674,9 @@ static void consider_age_out(w_root_t *root)
w_root_perform_age_out(root, root->gc_age);
}

// This is a little tricky. We have to be called with root->lock
// held, but we must not call w_root_stop_watch with the lock held,
// so we return true if the caller should do that
static bool consider_reap(w_root_t *root) {
time_t now;

Expand All @@ -1694,7 +1697,6 @@ static bool consider_reap(w_root_t *root) {
"Set idle_reap_age_seconds in your .watchmanconfig to control "
"this behavior\n",
root->root_path->len, root->root_path->buf, root->idle_reap_age);
w_root_stop_watch(root);
return true;
}

Expand Down Expand Up @@ -1795,9 +1797,12 @@ static void io_thread(w_root_t *root)
w_root_lock(root);
process_subscriptions(root);
process_triggers(root);
if (!consider_reap(root)) {
consider_age_out(root);
if (consider_reap(root)) {
w_root_unlock(root);
w_root_stop_watch(root);
break;
}
consider_age_out(root);
w_root_unlock(root);

timeoutms = MIN(biggest_timeout, timeoutms * 2);
Expand Down Expand Up @@ -2006,7 +2011,7 @@ void watchman_watcher_dtor(void) {
watcher_ops->global_dtor(watcher);
}


// Must not be called with root->lock held :-/
static bool remove_root_from_watched(w_root_t *root)
{
bool removed = false;
Expand Down
13 changes: 12 additions & 1 deletion tests/integration/test_age_watch.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@

class TestAgeOutWatch(WatchmanTestCase.WatchmanTestCase):

def test_watchReap(self):
def makeRootAndConfig(self):
root = self.mkdtemp()
with open(os.path.join(root, '.watchmanconfig'), 'w') as f:
f.write(json.dumps({
'idle_reap_age_seconds': 1
}))
return root

def test_watchReap(self):
root = self.makeRootAndConfig()
self.watchmanCommand('watch', root)

# make sure that we don't reap when there are registered triggers
Expand Down Expand Up @@ -49,6 +53,13 @@ def test_watchReap(self):
self.assertEqual(self.normFileList(watch_list['roots']), expected)

if self.transport != 'cli':
# let's verify that we can safely reap two roots at once without
# causing a deadlock
second = self.makeRootAndConfig()
self.watchmanCommand('watch', second)
self.assertFileList(second, ['.watchmanconfig'])

# and unsubscribe from root and allow it to be reaped
self.watchmanCommand('unsubscribe', root, 's')

# and now we should be ready to reap
Expand Down

0 comments on commit 59d2ad0

Please sign in to comment.