Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add heapdump telemetry device #776

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/telemetry.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ You probably want to gain additional insights from a race. Therefore, we have ad
jit JIT Compiler Profiler Enables JIT compiler logs.
gc GC log Enables GC logs.
jfr Flight Recorder Enables Java Flight Recorder (requires an Oracle JDK or OpenJDK 11+)
heapdump Heap Dump Captures a heap dump.
node-stats Node Stats Regularly samples node stats
recovery-stats Recovery Stats Regularly samples shard recovery stats
ccr-stats CCR Stats Regularly samples Cross Cluster Replication (CCR) related stats
Expand Down Expand Up @@ -66,6 +67,11 @@ gc

The ``gc`` telemetry device enables GC logs for the benchmark candidate. You can use tools like `GCViewer <https://github.com/chewiebug/GCViewer>`_ to analyze the GC logs.

heapdump
--------

The ``heapdump`` telemetry device will capture a heap dump after a benchmark has finished and right before the node is shutdown.

node-stats
----------

Expand Down
1 change: 1 addition & 0 deletions esrally/mechanic/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def _start_node(self, node_configuration, node_count_on_host):
telemetry.FlightRecorder(telemetry_params, node_telemetry_dir, java_major_version),
telemetry.JitCompiler(node_telemetry_dir),
telemetry.Gc(node_telemetry_dir, java_major_version),
telemetry.Heapdump(node_telemetry_dir),
telemetry.DiskIo(self.metrics_store, node_count_on_host, node_telemetry_dir, node_name),
telemetry.IndexSize(data_paths, self.metrics_store),
telemetry.StartupTime(self.metrics_store),
Expand Down
24 changes: 22 additions & 2 deletions esrally/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@

from esrally import metrics, time, exceptions
from esrally.metrics import MetaInfoScope
from esrally.utils import io, sysstats, console, opts
from esrally.utils import io, sysstats, console, opts, process


def list_telemetry():
console.println("Available telemetry devices:\n")
devices = [[device.command, device.human_name, device.help] for device in [JitCompiler, Gc, FlightRecorder,
NodeStats, RecoveryStats, CcrStats]]
Heapdump, NodeStats, RecoveryStats,
CcrStats]]
console.println(tabulate.tabulate(devices, ["Command", "Name", "Description"]))
console.println("\nKeep in mind that each telemetry device may incur a runtime overhead which can skew results.")

Expand Down Expand Up @@ -243,6 +244,25 @@ def java_opts(self, log_file):
return ["-Xlog:gc*=info,safepoint=info,age*=trace:file={}:utctime,uptimemillis,level,tags:filecount=0".format(log_file)]


class Heapdump(TelemetryDevice):
internal = False
command = "heapdump"
human_name = "Heap Dump"
help = "Captures a heap dump."

def __init__(self, log_root):
super().__init__()
self.log_root = log_root

def detach_from_node(self, node, running):
if running:
heap_dump_file = os.path.join(self.log_root, "heap_at_exit_{}.hprof".format(node.pid))
console.info("{}: Writing heap dump to [{}]".format(self.human_name, heap_dump_file), logger=self.logger)
cmd = "jmap -dump:format=b,file={} {}".format(heap_dump_file, node.pid)
if process.run_subprocess_with_logging(cmd):
self.logger.warning("Could not write heap dump to [%s]", heap_dump_file)


class CcrStats(TelemetryDevice):
internal = False
command = "ccr-stats"
Expand Down
12 changes: 12 additions & 0 deletions tests/telemetry_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,18 @@ def test_sets_options_for_java_9_or_above(self):
gc_java_opts)


class HeapdumpTests(TestCase):
@mock.patch("esrally.utils.process.run_subprocess_with_logging")
def test_generates_heap_dump(self, run_subprocess_with_logging):
run_subprocess_with_logging.return_value = 0
heapdump = telemetry.Heapdump("/var/log")
t = telemetry.Telemetry(enabled_devices=[heapdump.command], devices=[heapdump])
node = cluster.Node(pid="1234", host_name="localhost", node_name="rally0", telemetry=t)
t.attach_to_node(node)
t.detach_from_node(node, running=True)
run_subprocess_with_logging.assert_called_with("jmap -dump:format=b,file=/var/log/heap_at_exit_1234.hprof 1234")


class CcrStatsTests(TestCase):
def test_negative_sample_interval_forbidden(self):
clients = {"default": Client(), "cluster_b": Client()}
Expand Down