Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Move SYCL Module Splitting to library. Part 2 #13282

Merged
merged 8 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/PropertySetIO.h"

#include <memory>
#include <string>
Expand All @@ -36,6 +37,23 @@ enum IRSplitMode {
SPLIT_NONE // no splitting
};

struct SplittedImage {
maksimsab marked this conversation as resolved.
Show resolved Hide resolved
std::string ModuleFilePath;
util::PropertySetRegistry Properties;
std::string Symbols;

SplittedImage() = default;
SplittedImage(SplittedImage &) = default;
SplittedImage &operator=(SplittedImage &) = default;
SplittedImage(SplittedImage &&) = default;
SplittedImage &operator=(SplittedImage &&) = default;

SplittedImage(std::string_view File, util::PropertySetRegistry Properties,
std::string Symbols)
: ModuleFilePath(File), Properties(std::move(Properties)),
Symbols(std::move(Symbols)) {}
};

// A vector that contains all entry point functions in a split module.
using EntryPointSet = SetVector<Function *>;

Expand Down Expand Up @@ -193,6 +211,8 @@ class ModuleDesc {

ModuleDesc clone() const;

std::string makeSymbolTable() const;

#ifndef NDEBUG
void verifyESIMDProperty() const;
void dump() const;
Expand Down Expand Up @@ -261,6 +281,16 @@ void dumpEntryPoints(const Module &M, bool OnlyKernelsAreEntryPoints = false,
const char *msg = "", int Tab = 0);
#endif // NDEBUG

struct ModuleSplitterSettings {
asudarsa marked this conversation as resolved.
Show resolved Hide resolved
IRSplitMode Mode;
bool OutputAssembly = false; // Bitcode or LLVM IR.
StringRef OutputPrefix;
};

/// Splits the given module \p M according to the given \p Settings.
Expected<std::vector<SplittedImage>>
splitSYCLModule(std::unique_ptr<Module> M, ModuleSplitterSettings Settings);

} // namespace module_split

} // namespace llvm
Expand Down
75 changes: 75 additions & 0 deletions llvm/lib/SYCLLowerIR/ModuleSplitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,20 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IRPrinter/IRPrintingPasses.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/SYCLLowerIR/DeviceGlobals.h"
#include "llvm/SYCLLowerIR/LowerInvokeSimd.h"
#include "llvm/SYCLLowerIR/SYCLUtils.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
Expand Down Expand Up @@ -733,6 +737,14 @@ void EntryPointGroup::rebuild(const Module &M) {
Functions.insert(const_cast<Function *>(&F));
}

std::string ModuleDesc::makeSymbolTable() const {
std::string ST;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

llvm::SmallString would be a better fit to reduce amount of re-allocations

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The total size of a string is rarely small to benefit from small string optimizations. C++ mangled names are very long. What size would you suggest for SmallString for this case?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not that it would fit into a pre-allocated area on the stack, it is that SmallString is a wrapper around SmallVector, which does not fully re-allocate on every += operation, because its capacity grows at a different pace similar to std::vector's push_back

for (const Function *F : EntryPoints.Functions)
ST += (Twine(F->getName()) + "\n").str();

return ST;
}

namespace {
// This is a helper class, which allows to group/categorize function based on
// provided rules. It is intended to be used in device code split
Expand Down Expand Up @@ -1112,5 +1124,68 @@ SmallVector<ModuleDesc, 2> splitByESIMD(ModuleDesc &&MD,
return Result;
}

Error saveModuleIRInFile(Module &M, StringRef FilePath, bool OutputAssembly) {
int FD = -1;
if (std::error_code EC = sys::fs::openFileForWrite(FilePath, FD))
return errorCodeToError(EC);

raw_fd_ostream OS(FD, true);
ModulePassManager MPM;
ModuleAnalysisManager MAM;
PassBuilder PB;
PB.registerModuleAnalyses(MAM);
if (OutputAssembly)
MPM.addPass(PrintModulePass(OS));
else
MPM.addPass(BitcodeWriterPass(OS));

MPM.run(M, MAM);
return Error::success();
}

Expected<SplittedImage> saveModuleDesc(ModuleDesc &MD, std::string Prefix,
bool OutputAssembly) {
if (MD.isESIMD())
Prefix += "_esimd";

SplittedImage SI;

Prefix += OutputAssembly ? ".ll" : ".bc";
Error E = saveModuleIRInFile(MD.getModule(), Prefix, OutputAssembly);
if (E)
return std::move(E);
maksimsab marked this conversation as resolved.
Show resolved Hide resolved

SI.ModuleFilePath = Prefix;
SI.Symbols = MD.makeSymbolTable();

return std::move(SI);
maksimsab marked this conversation as resolved.
Show resolved Hide resolved
}

Expected<std::vector<SplittedImage>>
splitSYCLModule(std::unique_ptr<Module> M, ModuleSplitterSettings Settings) {
ModuleDesc MD = std::move(M); // makeModuleDesc() ?
// FIXME: false arguments are temporary for now.
auto Splitter =
getDeviceCodeSplitter(std::move(MD), Settings.Mode, false, false);
size_t ID = 0;
std::vector<SplittedImage> OutputImages;
while (Splitter->hasMoreSplits()) {
ModuleDesc MD2 = Splitter->nextSplit();
MD2.fixupLinkageOfDirectInvokeSimdTargets();

std::string OutIRFileName =
(Settings.OutputPrefix + "_" + std::to_string(ID)).str();
auto SplittedImageOrErr =
saveModuleDesc(MD2, OutIRFileName, Settings.OutputAssembly);
if (!SplittedImageOrErr)
return SplittedImageOrErr.takeError();

OutputImages.emplace_back(std::move(*SplittedImageOrErr));
++ID;
}

return OutputImages;
}

} // namespace module_split
} // namespace llvm
1 change: 1 addition & 0 deletions llvm/test/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ def get_asan_rtlib():
"sanstats",
"llvm-remarkutil",
"spirv-to-ir-wrapper",
"sycl-module-split",
]
)

Expand Down
121 changes: 121 additions & 0 deletions llvm/test/tools/sycl-module-split/auto-module-split-1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
; RUN: sycl-module-split -split=auto -S < %s -o %t
maksimsab marked this conversation as resolved.
Show resolved Hide resolved
; By default auto mode is equal to source mode
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0,CHECK
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1,CHECK
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-TU0-TXT
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-TU1-TXT

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64-unknown-linux"

$_Z3barIiET_S0_ = comdat any

; CHECK-TU1-NOT: @{{.*}}GV{{.*}}
; CHECK-TU0: @{{.*}}GV{{.*}} = internal addrspace(1) constant [1 x i32] [i32 42], align 4
@_ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4

; CHECK-TU1: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
; CHECK-TU1-TXT: {{.*}}TU0_kernel0{{.*}}
; CHECK-TU0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
; CHECK-TU0-TXT-NOT: {{.*}}TU0_kernel0{{.*}}

; CHECK-TU1: call spir_func void @{{.*}}foo{{.*}}()

define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel0() #0 {
entry:
call spir_func void @_Z3foov()
ret void
}

; CHECK-TU1: define dso_local spir_func void @{{.*}}foo{{.*}}()
; CHECK-TU0-NOT: define dso_local spir_func void @{{.*}}foo{{.*}}()

; CHECK-TU1: call spir_func i32 @{{.*}}bar{{.*}}(i32 1)

define dso_local spir_func void @_Z3foov() {
entry:
%a = alloca i32, align 4
%call = call spir_func i32 @_Z3barIiET_S0_(i32 1)
%add = add nsw i32 2, %call
store i32 %add, ptr %a, align 4
ret void
}

; CHECK-TU1: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
; CHECK-TU0-NOT: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)

; Function Attrs: nounwind
define linkonce_odr dso_local spir_func i32 @_Z3barIiET_S0_(i32 %arg) comdat {
entry:
%arg.addr = alloca i32, align 4
store i32 %arg, ptr %arg.addr, align 4
%0 = load i32, ptr %arg.addr, align 4
ret i32 %0
}

; CHECK-TU1: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
; CHECK-TU1-TXT: {{.*}}TU0_kernel1{{.*}}
; CHECK-TU0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
; CHECK-TU0-TXT-NOT: {{.*}}TU0_kernel1{{.*}}

; CHECK-TU1: call spir_func void @{{.*}}foo1{{.*}}()

define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel1() #0 {
entry:
call spir_func void @_Z4foo1v()
ret void
}

; CHECK-TU1: define dso_local spir_func void @{{.*}}foo1{{.*}}()
; CHECK-TU0-NOT: define dso_local spir_func void @{{.*}}foo1{{.*}}()

; Function Attrs: nounwind
define dso_local spir_func void @_Z4foo1v() {
entry:
%a = alloca i32, align 4
store i32 2, ptr %a, align 4
ret void
}

; CHECK-TU1-NOT: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
; CHECK-TU1-TXT-NOT: {{.*}}TU1_kernel{{.*}}
; CHECK-TU0: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
; CHECK-TU0-TXT: {{.*}}TU1_kernel{{.*}}

; CHECK-TU0: call spir_func void @{{.*}}foo2{{.*}}()

define dso_local spir_kernel void @_ZTSZ4mainE10TU1_kernel() #1 {
entry:
call spir_func void @_Z4foo2v()
ret void
}

; CHECK-TU1-NOT: define dso_local spir_func void @{{.*}}foo2{{.*}}()
; CHECK-TU0: define dso_local spir_func void @{{.*}}foo2{{.*}}()

; Function Attrs: nounwind
define dso_local spir_func void @_Z4foo2v() {
entry:
%a = alloca i32, align 4
; CHECK-TU0: %0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}}GV{{.*}} to ptr addrspace(4)), align 4
%0 = load i32, ptr addrspace(4) getelementptr inbounds ([1 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @_ZL2GV to ptr addrspace(4)), i64 0, i64 0), align 4
%add = add nsw i32 4, %0
store i32 %add, ptr %a, align 4
ret void
}

attributes #0 = { "sycl-module-id"="TU1.cpp" }
attributes #1 = { "sycl-module-id"="TU2.cpp" }

; Metadata is saved in both modules.
; CHECK: !opencl.spir.version = !{!0, !0}
; CHECK: !spirv.Source = !{!1, !1}

!opencl.spir.version = !{!0, !0}
!spirv.Source = !{!1, !1}

; CHECK: !0 = !{i32 1, i32 2}
; CHECK: !1 = !{i32 4, i32 100000}

!0 = !{i32 1, i32 2}
!1 = !{i32 4, i32 100000}
Loading
Loading