Skip to content

Commit

Permalink
Making memory_watchdog not run by default (#785)
Browse files Browse the repository at this point in the history
Closes #778
  • Loading branch information
ankushduacodes authored Nov 10, 2020
1 parent 93ddf80 commit 65337e0
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 29 deletions.
56 changes: 29 additions & 27 deletions automation/TaskManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
BROWSER_MEMORY_LIMIT = 1500 # in MB

AGGREGATOR_QUEUE_LIMIT = 10000 # number of records in the queue
MEMORY_WATCHDOG = "memory_watchdog"


def load_default_params(
Expand Down Expand Up @@ -224,30 +225,31 @@ def _manager_watchdog(self) -> None:
time.sleep(10)

# Check browser memory usage
for browser in self.browsers:
try:
# Sum the memory used by the geckodriver process, the
# main Firefox process and all its child processes.
# Use the USS metric for child processes, to avoid
# double-counting memory shared with their parent.
geckodriver = psutil.Process(browser.geckodriver_pid)
mem_bytes = geckodriver.memory_info().rss
children = geckodriver.children()
if children:
firefox = children[0]
mem_bytes += firefox.memory_info().rss
for child in firefox.children():
mem_bytes += child.memory_full_info().uss
mem = mem_bytes / 2 ** 20
if mem > BROWSER_MEMORY_LIMIT:
self.logger.info(
"BROWSER %i: Memory usage: %iMB"
", exceeding limit of %iMB"
% (browser.browser_id, int(mem), BROWSER_MEMORY_LIMIT)
)
browser.restart_required = True
except psutil.NoSuchProcess:
pass
if self.manager_params[MEMORY_WATCHDOG]:
for browser in self.browsers:
try:
# Sum the memory used by the geckodriver process, the
# main Firefox process and all its child processes.
# Use the USS metric for child processes, to avoid
# double-counting memory shared with their parent.
geckodriver = psutil.Process(browser.geckodriver_pid)
mem_bytes = geckodriver.memory_info().rss
children = geckodriver.children()
if children:
firefox = children[0]
mem_bytes += firefox.memory_info().rss
for child in firefox.children():
mem_bytes += child.memory_full_info().uss
mem = mem_bytes / 2 ** 20
if mem > BROWSER_MEMORY_LIMIT:
self.logger.info(
"BROWSER %i: Memory usage: %iMB"
", exceeding limit of %iMB"
% (browser.browser_id, int(mem), BROWSER_MEMORY_LIMIT)
)
browser.restart_required = True
except psutil.NoSuchProcess:
pass

# Check for browsers or displays that were not closed correctly
# 300 second buffer to avoid killing freshly launched browsers
Expand All @@ -273,9 +275,9 @@ def _manager_watchdog(self) -> None:
)
):
self.logger.debug(
"Process: %s (pid: %i) with start "
"time %s found running but not in "
"browser process list. Killing."
"Process %s (pid: %i) with start "
"time %s isn't controlled by any BrowserManager."
"Killing it now."
% (process.name(), process.pid, process.create_time())
)
kill_process_and_children(process, self.logger)
Expand Down
3 changes: 2 additions & 1 deletion automation/default_manager_params.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@
"failure_limit": null,
"testing": false,
"s3_bucket": null,
"s3_directory": null
"s3_directory": null,
"memory_watchdog": false
}
1 change: 1 addition & 0 deletions demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
# Update TaskManager configuration (use this for crawl-wide settings)
manager_params["data_directory"] = "~/Desktop/"
manager_params["log_directory"] = "~/Desktop/"
manager_params["memory_watchdog"] = True

# Instantiates the measurement platform
# Commands time out by default after 60 seconds
Expand Down
3 changes: 3 additions & 0 deletions docs/Configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ of configuration dictionaries.
on-the-fly. Depending on where you would like to add test functionality,
you may need to propagate the flag.
* This is not something you should enable during normal crawls.
* `memory_watchdog`
* A watchdog that tries to ensure that no Firefox instance takes up to much memory. It is set to false by default
* It is mostly useful for long running cloud crawls

# Browser Configuration Options

Expand Down
8 changes: 7 additions & 1 deletion docs/Platform-Architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@ to get the default parameters.

To learn more about the `manager_params` and `browser_params` have a look at [Configuration.md](Configuration.md)

`<process_watchdog>` is an optional parameter that can be passed to the `TaskManager` to create another thread that kills off all processes named `Xvfb` or `firefox` that haven't been spawned by OpenWPM.
## Watchdogs
In OpenWPM we have a so called watchdog that tries to ensure two things.
- `process_watchdog`
* It is an optional parameter that can be passed to the `TaskManager` to create another thread that kills off `GeckoDriver` (or `Xvfb`) instances that haven't been spawned by OpenWPM. (GeckoDriver is used by Selenium to control Firefox and Xvfb a "virtual display" so we simulate having graphics when running on a server)
- `memory_watchdog`
* A watchdog that tries to ensure that no Firefox instance takes up to much memory. It is set to false by default
* It is mostly useful for long running cloud crawls

## Issuing commands

Expand Down

0 comments on commit 65337e0

Please sign in to comment.