Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support delayed trigger of local triage report #6479

Merged
merged 2 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import datadog.trace.agent.tooling.usm.UsmExtractorImpl;
import datadog.trace.agent.tooling.usm.UsmMessageFactoryImpl;
import datadog.trace.api.InstrumenterConfig;
import datadog.trace.api.Platform;
import datadog.trace.api.ProductActivation;
import datadog.trace.api.telemetry.IntegrationsCollector;
import datadog.trace.bootstrap.FieldBackedContextAccessor;
Expand Down Expand Up @@ -198,7 +199,9 @@ public static ClassFileTransformer installBytebuddyAgent(
log.debug("Installed {} instrumenter(s)", installedCount);
}

InstrumenterFlare.register();
if (!Platform.isNativeImageBuilder()) {
InstrumenterFlare.register();
}

if (InstrumenterConfig.get().isTelemetryEnabled()) {
InstrumenterState.setObserver(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ public final class GeneralConfig {

public static final String TRACE_DEBUG = "trace.debug";
public static final String TRACE_TRIAGE = "trace.triage";
public static final String TRIAGE_REPORT_TRIGGER = "triage.report.trigger";
public static final String TRIAGE_REPORT_DIR = "triage.report.dir";

public static final String STARTUP_LOGS_ENABLED = "trace.startup.logs";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,13 @@
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipOutputStream;
import okhttp3.HttpUrl;
import okhttp3.MediaType;
Expand All @@ -35,8 +40,12 @@ final class TracerFlareService {

private static final String FLARE_ENDPOINT = "tracer_flare/v1";

private static final String REPORT_PREFIX = "dd-java-flare-";

private static final MediaType OCTET_STREAM = MediaType.get("application/octet-stream");

private static final Pattern DELAY_TRIGGER = Pattern.compile("(\\d+)([HhMmSs]?)");

private final AgentTaskScheduler scheduler = new AgentTaskScheduler(TRACER_FLARE);

private final Config config;
Expand All @@ -60,6 +69,49 @@ final class TracerFlareService {
this.okHttpClient = okHttpClient;
this.flareUrl = agentUrl.newBuilder().addPathSegments(FLARE_ENDPOINT).build();
this.tracer = tracer;

applyTriageReportTrigger(config.getTriageReportTrigger());
}

private void applyTriageReportTrigger(String triageTrigger) {
if (null != triageTrigger && !triageTrigger.isEmpty()) {
Matcher delayMatcher = DELAY_TRIGGER.matcher(triageTrigger);
if (delayMatcher.matches()) {
long delay = Integer.parseInt(delayMatcher.group(1));
String unit = delayMatcher.group(2);
if ("H".equalsIgnoreCase(unit)) {
delay = TimeUnit.HOURS.toSeconds(delay);
} else if ("M".equalsIgnoreCase(unit)) {
delay = TimeUnit.MINUTES.toSeconds(delay);
} else {
// already in seconds
}
scheduleTriageReport(delay);
} else {
log.info("Unrecognized triage trigger {}", triageTrigger);
}
}
}

private void scheduleTriageReport(long delayInSeconds) {
Path triagePath = Paths.get(config.getTriageReportDir());
// prepare at most 10 minutes before collection of the report, to match remote flare behaviour
scheduler.schedule(() -> prepareForFlare("triage"), delayInSeconds - 600, TimeUnit.SECONDS);
scheduler.schedule(
() -> {
try {
String reportName = REPORT_PREFIX + System.currentTimeMillis() + ".zip";
Path reportPath = triagePath.resolve(reportName);
log.info("Writing triage report to {}", reportPath);
Files.write(reportPath, buildFlareZip(true));
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
cleanupAfterFlare();
}
},
delayInSeconds,
TimeUnit.SECONDS);
}

public synchronized void prepareForFlare(String logLevel) {
Expand Down Expand Up @@ -112,7 +164,7 @@ public void sendFlare(String caseId, String email, String hostname) {
void doSend(String caseId, String email, String hostname, boolean dumpThreads) {
log.debug("Sending tracer flare");
try {
String flareName = "java-flare-" + caseId + "-" + System.currentTimeMillis() + ".zip";
String flareName = REPORT_PREFIX + caseId + "-" + System.currentTimeMillis() + ".zip";

RequestBody report = RequestBody.create(OCTET_STREAM, buildFlareZip(dumpThreads));

Expand Down
25 changes: 24 additions & 1 deletion internal-api/src/main/java/datadog/trace/api/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,8 @@
import static datadog.trace.api.config.GeneralConfig.TRACER_METRICS_MAX_PENDING;
import static datadog.trace.api.config.GeneralConfig.TRACE_DEBUG;
import static datadog.trace.api.config.GeneralConfig.TRACE_TRIAGE;
import static datadog.trace.api.config.GeneralConfig.TRIAGE_REPORT_DIR;
import static datadog.trace.api.config.GeneralConfig.TRIAGE_REPORT_TRIGGER;
import static datadog.trace.api.config.GeneralConfig.VERSION;
import static datadog.trace.api.config.IastConfig.IAST_DEBUG_ENABLED;
import static datadog.trace.api.config.IastConfig.IAST_DETECTION_MODE;
Expand Down Expand Up @@ -825,6 +827,9 @@ static class HostNameHolder {

private final boolean debugEnabled;
private final boolean triageEnabled;
private final String triageReportTrigger;
private final String triageReportDir;

private final boolean startupLogsEnabled;
private final String configFileStatus;

Expand Down Expand Up @@ -1859,7 +1864,15 @@ private Config(final ConfigProvider configProvider, final InstrumenterConfig ins
servletAsyncTimeoutError = configProvider.getBoolean(SERVLET_ASYNC_TIMEOUT_ERROR, true);

debugEnabled = configProvider.getBoolean(TRACE_DEBUG, false);
triageEnabled = configProvider.getBoolean(TRACE_TRIAGE, debugEnabled); // debug implies triage
triageReportTrigger = configProvider.getString(TRIAGE_REPORT_TRIGGER);
if (null != triageReportTrigger) {
// setting a trigger implies the triage directory and triage mode should be enabled
triageReportDir = configProvider.getString(TRIAGE_REPORT_DIR, getProp("java.io.tmpdir"));
triageEnabled = true;
} else {
triageReportDir = null;
triageEnabled = configProvider.getBoolean(TRACE_TRIAGE, debugEnabled);
}

startupLogsEnabled =
configProvider.getBoolean(STARTUP_LOGS_ENABLED, DEFAULT_STARTUP_LOGS_ENABLED);
Expand Down Expand Up @@ -3124,6 +3137,14 @@ public boolean isTriageEnabled() {
return triageEnabled;
}

public String getTriageReportTrigger() {
return triageReportTrigger;
}

public String getTriageReportDir() {
return triageReportDir;
}

public boolean isStartupLogsEnabled() {
return startupLogsEnabled;
}
Expand Down Expand Up @@ -4202,6 +4223,8 @@ public String toString() {
+ debugEnabled
+ ", triageEnabled="
+ triageEnabled
+ ", triageReportDir="
+ triageReportDir
+ ", startLogsEnabled="
+ startupLogsEnabled
+ ", configFile='"
Expand Down