Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Warn if CPU usage is too high (>90%) #1161 #1236

Merged
merged 6 commits into from
Jan 22, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion locust/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,12 @@ def sig_term_handler():
logger.info("Starting Locust %s" % version)
main_greenlet.join()
code = 0
if len(runners.locust_runner.errors) or len(runners.locust_runner.exceptions):
lr = runners.locust_runner
if lr.cpu_threshold_exceeded:
logger.warning("CPU threshold was exceeded during the test")
if lr.slave_cpu_threshold_exceeded:
logger.warning("CPU usage threshold was exceeded on slaves during the test")
if len(lr.errors) or len(lr.exceptions) or lr.cpu_threshold_exceeded or lr.slave_cpu_threshold_exceeded:
code = options.exit_code_on_error
shutdown(code=code)
except KeyboardInterrupt as e:
Expand Down
30 changes: 26 additions & 4 deletions locust/runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from time import time

import gevent
import psutil
import six
from gevent import GreenletExit
from gevent.pool import Group
Expand Down Expand Up @@ -40,6 +41,10 @@ def __init__(self, locust_classes, options):
self.state = STATE_INIT
self.hatching_greenlet = None
self.stepload_greenlet = None
self.current_cpu_usage = 0
self.cpu_threshold_exceeded = False
self.slave_cpu_threshold_exceeded = False
gevent.spawn(self.monitor_cpu)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, since we don't store any reference to this greenlet, it won't get killed. We should probably change so that the LocalLocustRunner.greenlet is a gevent.pool.Group instance (just like MasterLocustRunner and SlaveLocustRunner) and then make sure the CPU monitor greenlet is spawned from this group.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... I'm not sure how to do that. LocustRunner is supposed to be a singleton, so it shouldnt really matter, right? (not that we want to be sloppy :)

If you think this is important, would you mind taking a look yourself?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment, it's a singleton when started normally (through main.py). But we do create multiple runner instances within the tests.

Also, it's been proposed, and I think it's a good idea, to work towards an API where one can run Locust programatically, in which case I think the design will be much cleaner if we group the spawned greenlets together (that one can join) and make sure they are killed together.

I can definitely take a look!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've now changed so that we spawn all greenlets through the runner instance's greenlet attribute which is a gevent.pool.Group instance.

I also added a test for the CPU warning (and changed so that we use a constant for the montoring interval, so that I could decrease the run time of the test).

self.exceptions = {}
self.stats = global_stats
self.step_load = options.step_load
Expand Down Expand Up @@ -180,11 +185,22 @@ def kill_locust_greenlets(self, greenlets):
else:
for g in greenlets:
self.locusts.killone(g)


def monitor_cpu(self):
process = psutil.Process()
while True:
self.current_cpu_usage = process.cpu_percent()
if self.current_cpu_usage > 90 and not self.cpu_threshold_exceeded:
logging.warning("Loadgen CPU usage above 90%! This may constrain your throughput and may even give inconsistent response time measurements! See https://docs.locust.io/en/stable/running-locust-distributed.html for how to distribute the load over multiple CPU cores or machines")
self.cpu_threshold_exceeded = True
gevent.sleep(5.0)

def start_hatching(self, locust_count=None, hatch_rate=None, wait=False):
if self.state != STATE_RUNNING and self.state != STATE_HATCHING:
self.stats.clear_all()
self.exceptions = {}
self.cpu_threshold_exceeded = False
self.slave_cpu_threshold_exceeded = False
events.locust_start_hatching.fire()

# Dynamically changing the locust count
Expand Down Expand Up @@ -293,6 +309,7 @@ def __init__(self, id, state=STATE_INIT, heartbeat_liveness=3):
self.state = state
self.user_count = 0
self.heartbeat = heartbeat_liveness
self.cpu_threshold_exceeded = False

class MasterLocustRunner(DistributedLocustRunner):
def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -423,8 +440,13 @@ def client_listener(self):
logger.info("Removing %s client from running clients" % (msg.node_id))
elif msg.type == "heartbeat":
if msg.node_id in self.clients:
self.clients[msg.node_id].heartbeat = self.heartbeat_liveness
self.clients[msg.node_id].state = msg.data['state']
c = self.clients[msg.node_id]
c.heartbeat = self.heartbeat_liveness
c.state = msg.data['state']
if not c.cpu_threshold_exceeded and msg.data['current_cpu_usage'] > 90:
c.cpu_threshold_exceeded = True
self.slave_cpu_threshold_exceeded = True
logger.warning("Slave %s exceeded cpu threshold" % (msg.node_id))
elif msg.type == "stats":
events.slave_report.fire(client_id=msg.node_id, data=msg.data)
elif msg.type == "hatching":
Expand Down Expand Up @@ -487,7 +509,7 @@ def on_locust_error(locust_instance, exception, tb):

def heartbeat(self):
while True:
self.client.send(Message('heartbeat', {'state': self.slave_state}, self.client_id))
self.client.send(Message('heartbeat', {'state': self.slave_state, 'current_cpu_usage': self.current_cpu_usage}, self.client_id))
gevent.sleep(self.heartbeat_interval)

def worker(self):
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"pyzmq>=16.0.2",
"geventhttpclient-wheels==1.3.1.dev2",
"ConfigArgParse==0.15.1",
"psutil==5.6.7",
],
test_suite="locust.test",
tests_require=['mock'],
Expand Down