diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 6847c1db39c8ac..482e9dd168cc3d 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -357,6 +357,13 @@ class ASTWriter : public ASTDeserializationListener, /// contexts. llvm::DenseMap AnonymousDeclarationNumbers; + /// The external top level module during the writing process. Used to + /// generate signature for the module file being written. + /// + /// Only meaningful for standard C++ named modules. See the comments in + /// createSignatureForNamedModule() for details. + llvm::DenseSet TouchedTopLevelModules; + /// An update to a Decl. class DeclUpdate { /// A DeclUpdateKind. diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 8a0116fa893247..42da50abdc687c 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1200,6 +1200,31 @@ ASTFileSignature ASTWriter::createSignatureForNamedModule() const { for (auto [ExportImported, _] : WritingModule->Exports) Hasher.update(ExportImported->Signature); + // We combine all the used modules to make sure the signature is precise. + // Consider the case like: + // + // // a.cppm + // export module a; + // export inline int a() { ... } + // + // // b.cppm + // export module b; + // import a; + // export inline int b() { return a(); } + // + // Since both `a()` and `b()` are inline, we need to make sure the BMI of + // `b.pcm` will change after the implementation of `a()` changes. We can't + // get that naturally since we won't record the body of `a()` during the + // writing process. We can't reuse ODRHash here since ODRHash won't calculate + // the called function recursively. So ODRHash will be problematic if `a()` + // calls other inline functions. + // + // Probably we can solve this by a new hash mechanism. But the safety and + // efficiency may a problem too. Here we just combine the hash value of the + // used modules conservatively. + for (Module *M : TouchedTopLevelModules) + Hasher.update(M->Signature); + return ASTFileSignature::create(Hasher.result()); } @@ -6112,8 +6137,12 @@ LocalDeclID ASTWriter::GetDeclRef(const Decl *D) { // If D comes from an AST file, its declaration ID is already known and // fixed. - if (D->isFromASTFile()) + if (D->isFromASTFile()) { + if (isWritingStdCXXNamedModules() && D->getOwningModule()) + TouchedTopLevelModules.insert(D->getOwningModule()->getTopLevelModule()); + return LocalDeclID(D->getGlobalID()); + } assert(!(reinterpret_cast(D) & 0x01) && "Invalid decl pointer"); LocalDeclID &ID = DeclIDs[D]; diff --git a/clang/test/Modules/function-transitive-change.cppm b/clang/test/Modules/function-transitive-change.cppm new file mode 100644 index 00000000000000..cfce669e3a7bc2 --- /dev/null +++ b/clang/test/Modules/function-transitive-change.cppm @@ -0,0 +1,94 @@ +// Test that, in C++20 modules reduced BMI, the implementation detail changes +// in non-inline function may not propagate while the inline function changes +// can get propagate. +// +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: cd %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/a.v1.cppm -emit-reduced-module-interface -o %t/a.v1.pcm +// +// The BMI of A should differ since the different implementation. +// RUN: not diff %t/a.pcm %t/a.v1.pcm &> /dev/null +// +// The BMI of B should change since the dependent inline function changes +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -fmodule-file=a=%t/a.pcm \ +// RUN: -o %t/b.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -fmodule-file=a=%t/a.v1.pcm \ +// RUN: -o %t/b.v1.pcm +// RUN: not diff %t/b.v1.pcm %t/b.pcm &> /dev/null +// +// Test the case with unused partitions. +// RUN: %clang_cc1 -std=c++20 %t/M-A.cppm -emit-reduced-module-interface -o %t/M-A.pcm +// RUN: %clang_cc1 -std=c++20 %t/M-B.cppm -emit-reduced-module-interface -o %t/M-B.pcm +// RUN: %clang_cc1 -std=c++20 %t/M.cppm -emit-reduced-module-interface -o %t/M.pcm \ +// RUN: -fmodule-file=M:partA=%t/M-A.pcm \ +// RUN: -fmodule-file=M:partB=%t/M-B.pcm +// RUN: %clang_cc1 -std=c++20 %t/N.cppm -emit-reduced-module-interface -o %t/N.pcm \ +// RUN: -fmodule-file=M:partA=%t/M-A.pcm \ +// RUN: -fmodule-file=M:partB=%t/M-B.pcm \ +// RUN: -fmodule-file=M=%t/M.pcm +// +// Now we change `M-A.cppm` to `M-A.v1.cppm`. +// RUN: %clang_cc1 -std=c++20 %t/M-A.v1.cppm -emit-reduced-module-interface -o %t/M-A.v1.pcm +// RUN: %clang_cc1 -std=c++20 %t/M.cppm -emit-reduced-module-interface -o %t/M.v1.pcm \ +// RUN: -fmodule-file=M:partA=%t/M-A.v1.pcm \ +// RUN: -fmodule-file=M:partB=%t/M-B.pcm +// RUN: %clang_cc1 -std=c++20 %t/N.cppm -emit-reduced-module-interface -o %t/N.v1.pcm \ +// RUN: -fmodule-file=M:partA=%t/M-A.v1.pcm \ +// RUN: -fmodule-file=M:partB=%t/M-B.pcm \ +// RUN: -fmodule-file=M=%t/M.v1.pcm +// +// The BMI of N can keep unchanged since the N didn't use the changed partition unit 'M:A'. +// RUN: diff %t/N.v1.pcm %t/N.pcm &> /dev/null + +//--- a.cppm +export module a; +export inline int a() { + return 48; +} + +//--- a.v1.cppm +export module a; +export inline int a() { + return 50; +} + +//--- b.cppm +export module b; +import a; +export inline int b() { + return a(); +} + +//--- M-A.cppm +export module M:partA; +export inline int a() { + return 43; +} + +//--- M-A.v1.cppm +export module M:partA; +export inline int a() { + return 50; +} + +//--- M-B.cppm +export module M:partB; +export inline int b() { + return 44; +} + +//--- M.cppm +export module M; +export import :partA; +export import :partB; + +//--- N.cppm +export module N; +import M; + +export inline int n() { + return b(); +} diff --git a/clang/test/Modules/no-transitive-source-location-change.cppm b/clang/test/Modules/no-transitive-source-location-change.cppm index 303142a1af890b..c9d156a74ce822 100644 --- a/clang/test/Modules/no-transitive-source-location-change.cppm +++ b/clang/test/Modules/no-transitive-source-location-change.cppm @@ -1,30 +1,6 @@ // Testing that adding a new line in a module interface unit won't cause the BMI // of consuming module unit changes. // -// RUN: rm -rf %t -// RUN: split-file %s %t -// -// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm -// RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-module-interface -o %t/A.v1.pcm -// -// The BMI may not be the same since the source location differs. -// RUN: not diff %t/A.pcm %t/A.v1.pcm &> /dev/null -// -// The BMI of B shouldn't change since all the locations remain the same. -// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -fmodule-file=A=%t/A.pcm \ -// RUN: -o %t/B.pcm -// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -fmodule-file=A=%t/A.v1.pcm \ -// RUN: -o %t/B.v1.pcm -// RUN: diff %t/B.v1.pcm %t/B.pcm &> /dev/null -// -// The BMI of C may change since the locations for instantiations changes. -// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -fmodule-file=A=%t/A.pcm \ -// RUN: -o %t/C.pcm -// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -fmodule-file=A=%t/A.v1.pcm \ -// RUN: -o %t/C.v1.pcm -// RUN: not diff %t/C.v1.pcm %t/C.pcm &> /dev/null -// -// Test again with reduced BMI. // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm // RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-reduced-module-interface -o %t/A.v1.pcm //