diff --git a/clang/include/clang/Rewrite/Core/HTMLRewrite.h b/clang/include/clang/Rewrite/Core/HTMLRewrite.h index 340411e553479d..eecf589632746c 100644 --- a/clang/include/clang/Rewrite/Core/HTMLRewrite.h +++ b/clang/include/clang/Rewrite/Core/HTMLRewrite.h @@ -24,6 +24,15 @@ class RewriteBuffer; class Preprocessor; namespace html { + struct RelexRewriteCache; + using RelexRewriteCacheRef = std::shared_ptr; + + /// If you need to rewrite the same file multiple times, you can instantiate + /// a RelexRewriteCache and refer functions such as SyntaxHighlight() + /// and HighlightMacros() to it so that to avoid re-lexing the file each time. + /// The cache may outlive the rewriter as long as cached FileIDs and source + /// locations continue to make sense for the translation unit as a whole. + RelexRewriteCacheRef instantiateRelexRewriteCache(); /// HighlightRange - Highlight a range in the source code with the specified /// start/end tags. B/E must be in the same file. This ensures that @@ -67,13 +76,15 @@ namespace html { /// SyntaxHighlight - Relex the specified FileID and annotate the HTML with /// information about keywords, comments, etc. - void SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP); + void SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP, + RelexRewriteCacheRef Cache = nullptr); /// HighlightMacros - This uses the macro table state from the end of the /// file, to reexpand macros and insert (into the HTML) information about the /// macro expansions. This won't be perfectly perfect, but it will be /// reasonably close. - void HighlightMacros(Rewriter &R, FileID FID, const Preprocessor &PP); + void HighlightMacros(Rewriter &R, FileID FID, const Preprocessor &PP, + RelexRewriteCacheRef Cache = nullptr); } // end html namespace } // end clang namespace diff --git a/clang/lib/Rewrite/HTMLRewrite.cpp b/clang/lib/Rewrite/HTMLRewrite.cpp index 083a9c09297e1e..a96ca0764ae73d 100644 --- a/clang/lib/Rewrite/HTMLRewrite.cpp +++ b/clang/lib/Rewrite/HTMLRewrite.cpp @@ -21,8 +21,10 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include -using namespace clang; +using namespace clang; +using namespace llvm; +using namespace html; /// HighlightRange - Highlight a range in the source code with the specified /// start/end tags. B/E must be in the same file. This ensures that @@ -104,6 +106,32 @@ void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E, } } +namespace clang::html { +struct RelexRewriteCache { + // These structs mimic input arguments of HighlightRange(). + struct Highlight { + SourceLocation B, E; + std::string StartTag, EndTag; + bool IsTokenRange; + }; + struct RawHighlight { + unsigned B, E; + std::string StartTag, EndTag; + }; + + // SmallVector isn't appropriate because these vectors are almost never small. + using HighlightList = std::vector; + using RawHighlightList = std::vector; + + DenseMap SyntaxHighlights; + DenseMap MacroHighlights; +}; +} // namespace clang::html + +html::RelexRewriteCacheRef html::instantiateRelexRewriteCache() { + return std::make_shared(); +} + void html::EscapeText(Rewriter &R, FileID FID, bool EscapeSpaces, bool ReplaceTabs) { @@ -442,13 +470,18 @@ input.spoilerhider:checked + label + .spoiler{ /// information about keywords, macro expansions etc. This uses the macro /// table state from the end of the file, so it won't be perfectly perfect, /// but it will be reasonably close. -void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) { - RewriteBuffer &RB = R.getEditBuffer(FID); +static void SyntaxHighlightImpl( + Rewriter &R, FileID FID, const Preprocessor &PP, + llvm::function_ref + HighlightRangeCallback) { + RewriteBuffer &RB = R.getEditBuffer(FID); const SourceManager &SM = PP.getSourceManager(); llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID); + const char *BufferStart = FromFile.getBuffer().data(); + Lexer L(FID, FromFile, SM, PP.getLangOpts()); - const char *BufferStart = L.getBuffer().data(); // Inform the preprocessor that we want to retain comments as tokens, so we // can highlight them. @@ -475,13 +508,13 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) { // If this is a pp-identifier, for a keyword, highlight it as such. if (Tok.isNot(tok::identifier)) - HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, - "", ""); + HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart, + "", ""); break; } case tok::comment: - HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, - "", ""); + HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart, + "", ""); break; case tok::utf8_string_literal: // Chop off the u part of u8 prefix @@ -498,8 +531,8 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) { [[fallthrough]]; case tok::string_literal: // FIXME: Exclude the optional ud-suffix from the highlighted range. - HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, - "", ""); + HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart, + "", ""); break; case tok::hash: { // If this is a preprocessor directive, all tokens to end of line are too. @@ -516,8 +549,8 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) { } // Find end of line. This is a hack. - HighlightRange(RB, TokOffs, TokEnd, BufferStart, - "", ""); + HighlightRangeCallback(RB, TokOffs, TokEnd, BufferStart, + "", ""); // Don't skip the next token. continue; @@ -527,12 +560,43 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) { L.LexFromRawLexer(Tok); } } +void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP, + RelexRewriteCacheRef Cache) { + RewriteBuffer &RB = R.getEditBuffer(FID); + const SourceManager &SM = PP.getSourceManager(); + llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID); + const char *BufferStart = FromFile.getBuffer().data(); + + if (Cache) { + auto CacheIt = Cache->SyntaxHighlights.find(FID); + if (CacheIt != Cache->SyntaxHighlights.end()) { + for (const RelexRewriteCache::RawHighlight &H : CacheIt->second) { + HighlightRange(RB, H.B, H.E, BufferStart, H.StartTag.data(), + H.EndTag.data()); + } + return; + } + } + + // "Every time you would call HighlightRange, cache the inputs as well." + auto HighlightRangeCallback = [&](RewriteBuffer &RB, unsigned B, unsigned E, + const char *BufferStart, + const char *StartTag, const char *EndTag) { + HighlightRange(RB, B, E, BufferStart, StartTag, EndTag); + + if (Cache) + Cache->SyntaxHighlights[FID].push_back({B, E, StartTag, EndTag}); + }; + + SyntaxHighlightImpl(R, FID, PP, HighlightRangeCallback); +} + +static void HighlightMacrosImpl( + Rewriter &R, FileID FID, const Preprocessor &PP, + llvm::function_ref + HighlightRangeCallback) { -/// HighlightMacros - This uses the macro table state from the end of the -/// file, to re-expand macros and insert (into the HTML) information about the -/// macro expansions. This won't be perfectly perfect, but it will be -/// reasonably close. -void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) { // Re-lex the raw token stream into a token buffer. const SourceManager &SM = PP.getSourceManager(); std::vector TokenStream; @@ -659,11 +723,44 @@ void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) { // get highlighted. Expansion = "" + Expansion + ""; - HighlightRange(R, LLoc.getBegin(), LLoc.getEnd(), "", - Expansion.c_str(), LLoc.isTokenRange()); + HighlightRangeCallback(R, LLoc.getBegin(), LLoc.getEnd(), + "", Expansion.c_str(), + LLoc.isTokenRange()); } // Restore the preprocessor's old state. TmpPP.setDiagnostics(*OldDiags); TmpPP.setPragmasEnabled(PragmasPreviouslyEnabled); } + +/// HighlightMacros - This uses the macro table state from the end of the +/// file, to re-expand macros and insert (into the HTML) information about the +/// macro expansions. This won't be perfectly perfect, but it will be +/// reasonably close. +void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor &PP, + RelexRewriteCacheRef Cache) { + if (Cache) { + auto CacheIt = Cache->MacroHighlights.find(FID); + if (CacheIt != Cache->MacroHighlights.end()) { + for (const RelexRewriteCache::Highlight &H : CacheIt->second) { + HighlightRange(R, H.B, H.E, H.StartTag.data(), H.EndTag.data(), + H.IsTokenRange); + } + return; + } + } + + // "Every time you would call HighlightRange, cache the inputs as well." + auto HighlightRangeCallback = [&](Rewriter &R, SourceLocation B, + SourceLocation E, const char *StartTag, + const char *EndTag, bool isTokenRange) { + HighlightRange(R, B, E, StartTag, EndTag, isTokenRange); + + if (Cache) { + Cache->MacroHighlights[FID].push_back( + {B, E, StartTag, EndTag, isTokenRange}); + } + }; + + HighlightMacrosImpl(R, FID, PP, HighlightRangeCallback); +} diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp index 86947b7929e9b6..fb5030d373c2f1 100644 --- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -69,6 +69,8 @@ class HTMLDiagnostics : public PathDiagnosticConsumer { const Preprocessor &PP; const bool SupportsCrossFileDiagnostics; llvm::StringSet<> EmittedHashes; + html::RelexRewriteCacheRef RewriterCache = + html::instantiateRelexRewriteCache(); public: HTMLDiagnostics(PathDiagnosticConsumerOptions DiagOpts, @@ -309,10 +311,6 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, return; } - // FIXME: This causes each file to be re-parsed and syntax-highlighted - // and macro-expanded separately for each report. We could cache such rewrites - // across all reports and only re-do the part that's actually different: - // the warning/note bubbles. std::string report = GenerateHTML(D, R, SMgr, path, declName.c_str()); if (report.empty()) { llvm::errs() << "warning: no diagnostics generated for main file.\n"; @@ -882,8 +880,8 @@ void HTMLDiagnostics::RewriteFile(Rewriter &R, const PathPieces &path, // If we have a preprocessor, relex the file and syntax highlight. // We might not have a preprocessor if we come from a deserialized AST file, // for example. - html::SyntaxHighlight(R, FID, PP); - html::HighlightMacros(R, FID, PP); + html::SyntaxHighlight(R, FID, PP, RewriterCache); + html::HighlightMacros(R, FID, PP, RewriterCache); } void HTMLDiagnostics::HandlePiece(Rewriter &R, FileID BugFileID, diff --git a/clang/test/Analysis/html_diagnostics/counter.c b/clang/test/Analysis/html_diagnostics/counter.c new file mode 100644 index 00000000000000..22f722424372f7 --- /dev/null +++ b/clang/test/Analysis/html_diagnostics/counter.c @@ -0,0 +1,29 @@ +// RUN: rm -fR %t +// RUN: mkdir %t +// RUN: %clang_analyze_cc1 -analyzer-checker=core \ +// RUN: -analyzer-output=html -o %t -verify %s +// RUN: grep -v CHECK %t/report-*.html | FileCheck %s + + +void foo() { + int *x = 0; + *x = __COUNTER__; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}} +} + +void bar() { + int *y; + *y = __COUNTER__; // expected-warning{{Dereference of undefined pointer value (loaded from variable 'y')}} +} + +// The checks below confirm that both reports have the same values for __COUNTER__. +// +// FIXME: The correct values are (0, 1, 0, 1). Because we re-lex the file in order +// to detect macro expansions for HTML report purposes, they turn into (2, 3, 2, 3) +// by the time we emit HTML. But at least it's better than (2, 3, 4, 5) +// which would have been the case if we re-lexed the file *each* time we +// emitted an HTML report. + +// CHECK: __COUNTER__2 +// CHECK: __COUNTER__3 +// CHECK: __COUNTER__2 +// CHECK: __COUNTER__3