From 721dd3bc2f159f58542653b56ae272f1504875f8 Mon Sep 17 00:00:00 2001 From: Artem Dergachev Date: Wed, 10 Jan 2024 18:45:27 -0800 Subject: [PATCH] [analyzer] NFC: Don't regenerate duplicate HTML reports. This is a performance optimization for HTML diagnostics output mode. Currently they're incredibly inefficient: * The HTMLRewriter is re-run from scratch on every file on every report. Each such re-run involves re-lexing the entire file and producing a syntax-highlighted webpage of the entire file, with text behind macros duplicated as pop-up macro expansion tooltips. Then, warning and note bubbles are injected into the page. Only the bubble part is different across reports; everything else can theoretically be cached. * Additionally, if duplicate reports are emitted (with the same issue hash), HTMLRewriter will be re-run even though the output file is going to be discarded due to filename collision. This is mostly an issue for path-insensitive bug reports because path-sensitive bug reports are already deduplicated by the BugReporter as part of searching for the shortest bug path. But on some translation units almost 80% of bug reports are dry-run here. We only get away with all this because there are usually very few reports emitted per file. But if loud checkers are enabled, such as `webkit.*`, this may explode in complexity and even cause the compiler to run over the 32-bit SourceLocation addressing limit. (We're re-lexing everything each time, remember?) This patch hotfixes the *second* problem. Adds a FIXME for the first problem, which will require more yak shaving to solve. rdar://120801986 --- clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp index 69d25120dcd43b..86947b7929e9b6 100644 --- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -68,6 +68,7 @@ class HTMLDiagnostics : public PathDiagnosticConsumer { bool noDir = false; const Preprocessor &PP; const bool SupportsCrossFileDiagnostics; + llvm::StringSet<> EmittedHashes; public: HTMLDiagnostics(PathDiagnosticConsumerOptions DiagOpts, @@ -301,6 +302,17 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, } } + SmallString<32> IssueHash = getIssueHash(D, PP); + auto [It, IsNew] = EmittedHashes.insert(IssueHash); + if (!IsNew) { + // We've already emitted a duplicate issue. It'll get overwritten anyway. + return; + } + + // FIXME: This causes each file to be re-parsed and syntax-highlighted + // and macro-expanded separately for each report. We could cache such rewrites + // across all reports and only re-do the part that's actually different: + // the warning/note bubbles. std::string report = GenerateHTML(D, R, SMgr, path, declName.c_str()); if (report.empty()) { llvm::errs() << "warning: no diagnostics generated for main file.\n"; @@ -332,7 +344,7 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, << declName.c_str() << "-" << offsetDecl << "-"; } - FileName << StringRef(getIssueHash(D, PP)).substr(0, 6).str() << ".html"; + FileName << StringRef(IssueHash).substr(0, 6).str() << ".html"; SmallString<128> ResultPath; llvm::sys::path::append(ResultPath, Directory, FileName.str());