Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip copying jfr recoding into a temporary file #1242

Merged
merged 2 commits into from
May 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions profiler/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,6 @@ The agent logs the profiling configuration at `INFO` during startup. You can gre
[otel.javaagent 2021-09-28 18:17:04:246 +0000] [main] INFO <snip> - -----------------------
```

### What about this escape hatch?

If the escape hatch becomes active, it will log with `com.splunk.opentelemetry.profiler.RecordingEscapeHatch`
(you can grep for this in the logs). You may also look for `"** THIS WILL RESULT IN LOSS OF PROFILING DATA **"`
as a big hint that things are not well.

You may need to free up some disk space and/or give the JVM more resources.

Comment on lines -108 to -115
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@laurit @breedx-splk So this is gone? If that's the case, I'll remove it from the official troubleshooting.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Current text:

Loss of profiling data or gaps in profiling data
-------------------------------------------------------------

If there are less than 100 megabytes of space available for the Java Virtual Machine, AlwaysOn Profiling activates the recording escape hatch, which appears in the logs as ``com.splunk.opentelemetry.profiler.RecordingEscapeHatch``. The escape hatch drops all logs with profiling data until more space is available.

To avoid the loss of profiling data due to the escape hatch, provide enough resources to the JVM.

### What if I'm on an unsupported JVM?

If your JVM does not support JFR, the profiler logs a warning at startup with the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
import static com.splunk.opentelemetry.profiler.Configuration.CONFIG_KEY_KEEP_FILES;
import static com.splunk.opentelemetry.profiler.Configuration.CONFIG_KEY_PROFILER_DIRECTORY;
import static com.splunk.opentelemetry.profiler.Configuration.CONFIG_KEY_RECORDING_DURATION;
import static com.splunk.opentelemetry.profiler.JfrFileLifecycleEvents.buildOnFileFinished;
import static com.splunk.opentelemetry.profiler.JfrFileLifecycleEvents.buildOnNewRecording;
import static com.splunk.opentelemetry.profiler.util.Runnables.logUncaught;
import static java.util.logging.Level.WARNING;

Expand All @@ -33,7 +31,6 @@
import com.splunk.opentelemetry.profiler.events.EventPeriods;
import com.splunk.opentelemetry.profiler.exporter.CpuEventExporter;
import com.splunk.opentelemetry.profiler.exporter.PprofCpuEventExporter;
import com.splunk.opentelemetry.profiler.util.FileDeleter;
import com.splunk.opentelemetry.profiler.util.HelpfulExecutors;
import io.opentelemetry.api.logs.Logger;
import io.opentelemetry.javaagent.extension.AgentListener;
Expand All @@ -52,7 +49,6 @@
import java.time.Duration;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.function.Consumer;

@AutoService(AgentListener.class)
public class JfrActivator implements AgentListener {
Expand Down Expand Up @@ -127,12 +123,6 @@ private void activateJfrAndRunForever(ConfigProperties config, Resource resource

// can't be null, default value is set in Configuration.getProperties
Duration recordingDuration = config.getDuration(CONFIG_KEY_RECORDING_DURATION, null);
RecordingEscapeHatch recordingEscapeHatch =
RecordingEscapeHatch.builder()
.namingConvention(namingConvention)
.configKeepsFilesOnDisk(keepFiles(config))
.recordingDuration(recordingDuration)
.build();
Map<String, String> jfrSettings = buildJfrSettings(config);

EventReader eventReader = new EventReader();
Expand Down Expand Up @@ -174,37 +164,27 @@ private void activateJfrAndRunForever(ConfigProperties config, Resource resource
EventProcessingChain eventProcessingChain =
new EventProcessingChain(
eventReader, spanContextualizer, threadDumpProcessor, tlabProcessor);
Consumer<Path> deleter = buildFileDeleter(config);
JfrDirCleanup dirCleanup = new JfrDirCleanup(deleter);

Consumer<Path> onFileFinished = buildOnFileFinished(deleter, dirCleanup);

JfrPathHandler jfrPathHandler =
JfrPathHandler.builder()
.eventProcessingChain(eventProcessingChain)
.onFileFinished(onFileFinished)
.build();

Consumer<Path> onNewRecording = buildOnNewRecording(jfrPathHandler, dirCleanup);
JfrRecordingHandler jfrRecordingHandler =
JfrRecordingHandler.builder().eventProcessingChain(eventProcessingChain).build();

JfrRecorder recorder =
JfrRecorder.builder()
.settings(jfrSettings)
.maxAgeDuration(recordingDuration.multipliedBy(10))
.jfr(JFR.instance)
.onNewRecording(jfrRecordingHandler)
.namingConvention(namingConvention)
.onNewRecordingFile(onNewRecording)
.keepRecordingFiles(keepFiles(config))
.build();

RecordingSequencer sequencer =
RecordingSequencer.builder()
.recordingDuration(recordingDuration)
.recordingEscapeHatch(recordingEscapeHatch)
.recorder(recorder)
.build();

sequencer.start();
dirCleanup.registerShutdownHook();
}

private Logger buildOtelLogger(LogRecordProcessor logProcessor, Resource resource) {
Expand Down Expand Up @@ -246,14 +226,6 @@ private Map<String, String> buildJfrSettings(ConfigProperties config) {
return overrides.apply(jfrSettings);
}

private Consumer<Path> buildFileDeleter(ConfigProperties config) {
if (keepFiles(config)) {
logger.log(WARNING, "{0} is enabled, leaving JFR files on disk.", CONFIG_KEY_KEEP_FILES);
return FileDeleter.noopFileDeleter();
}
return FileDeleter.newDeleter();
}

private boolean keepFiles(ConfigProperties config) {
return config.getBoolean(CONFIG_KEY_KEEP_FILES, false);
}
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,19 @@ class JfrRecorder {

private final Duration maxAgeDuration;
private final JFR jfr;
private final Consumer<Path> onNewRecordingFile;
private final Consumer<InputStream> onNewRecording;
private final RecordingFileNamingConvention namingConvention;
private final boolean keepRecordingFiles;
private volatile Recording recording;
private volatile Instant snapshotStart = Instant.now();

JfrRecorder(Builder builder) {
this.settings = requireNonNull(builder.settings);
this.maxAgeDuration = requireNonNull(builder.maxAgeDuration);
this.jfr = requireNonNull(builder.jfr);
this.onNewRecordingFile = requireNonNull(builder.onNewRecordingFile);
this.onNewRecording = requireNonNull(builder.onNewRecording);
this.namingConvention = requireNonNull(builder.namingConvention);
this.keepRecordingFiles = builder.keepRecordingFiles;
}

public void start() {
Expand All @@ -75,24 +77,33 @@ Recording newRecording() {

public void flushSnapshot() {
try (Recording snap = jfr.takeSnapshot()) {
Path path = namingConvention.newOutputPath().toAbsolutePath();
logger.log(FINE, "Flushing a JFR snapshot: {0}", path);
Instant snapshotEnd = snap.getStopTime();
try (InputStream in = snap.getStream(snapshotStart, snapshotEnd)) {
try (OutputStream out = Files.newOutputStream(path)) {
copy(in, out);
if (logger.isLoggable(FINE)) {
logger.log(
FINE,
"Wrote JFR dump {0} with size {1}",
new Object[] {path, path.toFile().length()});
Instant start = snapshotStart;
snapshotStart = snapshotEnd;
if (keepRecordingFiles) {
Path path = namingConvention.newOutputPath().toAbsolutePath();
logger.log(FINE, "Flushing a JFR snapshot: {0}", path);
try (InputStream in = snap.getStream(start, snapshotEnd)) {
try (OutputStream out = Files.newOutputStream(path)) {
copy(in, out);
if (logger.isLoggable(FINE)) {
logger.log(
FINE,
"Wrote JFR dump {0} with size {1}",
new Object[] {path, path.toFile().length()});
}
}
}
try (InputStream in = Files.newInputStream(path)) {
onNewRecording.accept(in);
}
} else {
try (InputStream in = snap.getStream(start, snapshotEnd)) {
onNewRecording.accept(in);
}
}
snapshotStart = snapshotEnd;
onNewRecordingFile.accept(path);
} catch (IOException e) {
logger.log(SEVERE, "Error flushing JFR snapshot data to disk", e);
logger.log(SEVERE, "Error handling JFR recording", e);
}
}

Expand Down Expand Up @@ -122,7 +133,8 @@ public static class Builder {
private Map<String, String> settings;
private Duration maxAgeDuration;
private JFR jfr = JFR.instance;
private Consumer<Path> onNewRecordingFile;
private Consumer<InputStream> onNewRecording;
private boolean keepRecordingFiles;

public Builder settings(Map<String, String> settings) {
this.settings = settings;
Expand All @@ -139,8 +151,8 @@ public Builder jfr(JFR jfr) {
return this;
}

public Builder onNewRecordingFile(Consumer<Path> onNewRecordingFile) {
this.onNewRecordingFile = onNewRecordingFile;
public Builder onNewRecording(Consumer<InputStream> onNewRecording) {
this.onNewRecording = onNewRecording;
return this;
}

Expand All @@ -149,6 +161,11 @@ public Builder namingConvention(RecordingFileNamingConvention namingConvention)
return this;
}

public Builder keepRecordingFiles(boolean keepRecordingFiles) {
this.keepRecordingFiles = keepRecordingFiles;
return this;
}

public JfrRecorder build() {
return new JfrRecorder(this);
}
Expand Down
Loading