diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h index 374fa65bd02e3b..44c9ded317fa58 100644 --- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h @@ -27,6 +27,19 @@ namespace ROCDL { /// 5. Returns an empty string. StringRef getROCMPath(); +/// Helper enum for specifying the AMD GCN device libraries required for +/// compilation. +enum class AMDGCNLibraries : uint32_t { + None = 0, + Ockl = 1, + Ocml = 2, + OpenCL = 4, + Hip = 8, + LastLib = Hip, + LLVM_MARK_AS_BITMASK_ENUM(LastLib), + All = (LastLib << 1) - 1 +}; + /// Base class for all ROCDL serializations from GPU modules into binary /// strings. By default this class serializes into LLVM bitcode. class SerializeGPUModuleBase : public LLVM::ModuleToObject { @@ -49,8 +62,8 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// Returns the bitcode files to be loaded. ArrayRef getFileList() const; - /// Appends standard ROCm device libraries like `ocml.bc`, `ockl.bc`, etc. - LogicalResult appendStandardLibs(); + /// Appends standard ROCm device libraries to `fileList`. + LogicalResult appendStandardLibs(AMDGCNLibraries libs); /// Loads the bitcode files in `fileList`. virtual std::optional>> @@ -63,15 +76,20 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { LogicalResult handleBitcodeFile(llvm::Module &module) override; protected: - /// Appends the paths of common ROCm device libraries to `libs`. - LogicalResult getCommonBitcodeLibs(llvm::SmallVector &libs, - SmallVector &libPath, - StringRef isaVersion); - /// Adds `oclc` control variables to the LLVM module. - void addControlVariables(llvm::Module &module, bool wave64, bool daz, - bool finiteOnly, bool unsafeMath, bool fastMath, - bool correctSqrt, StringRef abiVer); + void addControlVariables(llvm::Module &module, AMDGCNLibraries libs, + bool wave64, bool daz, bool finiteOnly, + bool unsafeMath, bool fastMath, bool correctSqrt, + StringRef abiVer); + + /// Compiles assembly to a binary. + virtual std::optional> + compileToBinary(const std::string &serializedISA); + + /// Default implementation of `ModuleToObject::moduleToObject`. + std::optional> + moduleToObjectImpl(const gpu::TargetOptions &targetOptions, + llvm::Module &llvmModule); /// Returns the assembled ISA. std::optional> assembleIsa(StringRef isa); @@ -84,6 +102,9 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// List of LLVM bitcode files to link to. SmallVector fileList; + + /// AMD GCN libraries to use when linking, the default is using none. + AMDGCNLibraries deviceLibs = AMDGCNLibraries::None; }; } // namespace ROCDL } // namespace mlir diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 61ab298ebfb986..08c8aea36fac9e 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -106,7 +106,7 @@ if(MLIR_ENABLE_ROCM_CONVERSIONS) "Building mlir with ROCm support requires the AMDGPU backend") endif() - set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") + set(DEFAULT_ROCM_PATH "" CACHE PATH "Fallback path to search for ROCm installs") target_compile_definitions(obj.MLIRGPUTransforms PRIVATE __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}" diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt index 5a3fa160850b43..4393ff1775ef9a 100644 --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -123,17 +123,12 @@ add_mlir_dialect_library(MLIRROCDLTarget ) if(MLIR_ENABLE_ROCM_CONVERSIONS) - if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)) - message(SEND_ERROR - "Building mlir with ROCm support requires the AMDGPU backend") - endif() - if (DEFINED ROCM_PATH) set(DEFAULT_ROCM_PATH "${ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") elseif(DEFINED ENV{ROCM_PATH}) set(DEFAULT_ROCM_PATH "$ENV{ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") else() - set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") + set(DEFAULT_ROCM_PATH "" CACHE PATH "Fallback path to search for ROCm installs") endif() message(VERBOSE "MLIR Default ROCM toolkit path: ${DEFAULT_ROCM_PATH}") diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index cc13e5b7436ea7..fdf1c93b372105 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -17,9 +17,6 @@ #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Target/LLVM/ROCDL/Utils.h" -#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/IR/Constants.h" @@ -112,8 +109,9 @@ SerializeGPUModuleBase::SerializeGPUModuleBase( if (auto file = dyn_cast(attr)) fileList.push_back(file.str()); - // Append standard ROCm device bitcode libraries to the files to be loaded. - (void)appendStandardLibs(); + // By default add all libraries if the toolkit path is not empty. + if (!getToolkitPath().empty()) + deviceLibs = AMDGCNLibraries::All; } void SerializeGPUModuleBase::init() { @@ -138,29 +136,67 @@ ArrayRef SerializeGPUModuleBase::getFileList() const { return fileList; } -LogicalResult SerializeGPUModuleBase::appendStandardLibs() { +LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) { + if (libs == AMDGCNLibraries::None) + return success(); StringRef pathRef = getToolkitPath(); - if (!pathRef.empty()) { - SmallVector path; - path.insert(path.begin(), pathRef.begin(), pathRef.end()); - llvm::sys::path::append(path, "amdgcn", "bitcode"); - pathRef = StringRef(path.data(), path.size()); - if (!llvm::sys::fs::is_directory(pathRef)) { - getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef - << " does not exist or is not a directory."; - return failure(); - } - StringRef isaVersion = - llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); - isaVersion.consume_front("gfx"); - return getCommonBitcodeLibs(fileList, path, isaVersion); + // Fail if the toolkit is empty. + if (pathRef.empty()) + return failure(); + + // Get the path for the device libraries + SmallString<256> path; + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + llvm::sys::path::append(path, "amdgcn", "bitcode"); + pathRef = StringRef(path.data(), path.size()); + + // Fail if the path is invalid. + if (!llvm::sys::fs::is_directory(pathRef)) { + getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef + << " does not exist or is not a directory."; + return failure(); } + + // Get the ISA version. + StringRef isaVersion = + llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); + isaVersion.consume_front("gfx"); + + // Helper function for adding a library. + auto addLib = [&](const Twine &lib) -> bool { + auto baseSize = path.size(); + llvm::sys::path::append(path, lib); + StringRef pathRef(path.data(), path.size()); + if (!llvm::sys::fs::is_regular_file(pathRef)) { + getOperation().emitRemark() << "Bitcode library path: " << pathRef + << " does not exist or is not a file.\n"; + return true; + } + fileList.push_back(pathRef.str()); + path.truncate(baseSize); + return false; + }; + + // Add ROCm device libraries. Fail if any of the libraries is not found, ie. + // if any of the `addLib` failed. + if ((any(libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) || + (any(libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) || + (any(libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) || + (any(libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc")) || + (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl)) && + addLib("oclc_isa_version_" + isaVersion + ".bc"))) + return failure(); return success(); } std::optional>> SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { SmallVector> bcFiles; + // Return if there are no libs to load. + if (deviceLibs == AMDGCNLibraries::None && fileList.empty()) + return bcFiles; + if (failed(appendStandardLibs(deviceLibs))) + return std::nullopt; if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles, true))) return std::nullopt; @@ -174,80 +210,79 @@ LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) { // Stop spamming us with clang version numbers if (auto *ident = module.getNamedMetadata("llvm.ident")) module.eraseNamedMetadata(ident); + // Override the libModules datalayout and target triple with the compiler's + // data layout should there be a discrepency. + setDataLayoutAndTriple(module); return success(); } void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) { - [[maybe_unused]] std::optional targetMachine = + std::optional targetMachine = getOrCreateTargetMachine(); assert(targetMachine && "expect a TargetMachine"); - addControlVariables(module, target.hasWave64(), target.hasDaz(), + // If all libraries are not set, traverse the module to determine which + // libraries are required. + if (deviceLibs != AMDGCNLibraries::All) { + for (llvm::Function &f : module.functions()) { + if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) { + StringRef funcName = f.getName(); + if ("printf" == funcName) + deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl | + AMDGCNLibraries::Ocml; + if (funcName.starts_with("__ockl_")) + deviceLibs |= AMDGCNLibraries::Ockl; + if (funcName.starts_with("__ocml_")) + deviceLibs |= AMDGCNLibraries::Ocml; + } + } + } + addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(), target.hasFiniteOnly(), target.hasUnsafeMath(), target.hasFastMath(), target.hasCorrectSqrt(), target.getAbi()); } -// Get the paths of ROCm device libraries. -LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs( - llvm::SmallVector &libs, SmallVector &libPath, - StringRef isaVersion) { - auto addLib = [&](StringRef path) -> bool { - if (!llvm::sys::fs::is_regular_file(path)) { - getOperation().emitRemark() << "Bitcode library path: " << path - << " does not exist or is not a file.\n"; - return true; - } - libs.push_back(path.str()); - return false; - }; - auto getLibPath = [&libPath](Twine lib) { - auto baseSize = libPath.size(); - llvm::sys::path::append(libPath, lib + ".bc"); - std::string path(StringRef(libPath.data(), libPath.size()).str()); - libPath.truncate(baseSize); - return path; - }; - - // Add ROCm device libraries. Fail if any of the libraries is not found. - if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) || - addLib(getLibPath("hip")) || addLib(getLibPath("opencl")) || - addLib(getLibPath("oclc_isa_version_" + isaVersion))) - return failure(); - return success(); -} - void SerializeGPUModuleBase::addControlVariables( - llvm::Module &module, bool wave64, bool daz, bool finiteOnly, - bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) { - llvm::Type *i8Ty = llvm::Type::getInt8Ty(module.getContext()); - auto addControlVariable = [i8Ty, &module](StringRef name, bool enable) { + llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, + bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, + StringRef abiVer) { + // Return if no device libraries are required. + if (libs == AMDGCNLibraries::None) + return; + // Helper function for adding control variables. + auto addControlVariable = [&module](StringRef name, uint32_t value, + uint32_t bitwidth) { + if (module.getNamedGlobal(name)) { + return; + } + llvm::IntegerType *type = + llvm::IntegerType::getIntNTy(module.getContext(), bitwidth); llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable( - module, i8Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, - llvm::ConstantInt::get(i8Ty, enable), name, nullptr, - llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4); + module, /*isConstant=*/type, true, + llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, + llvm::ConstantInt::get(type, value), name, /*before=*/nullptr, + /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, + /*addressSpace=*/4); controlVariable->setVisibility( llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); - controlVariable->setAlignment(llvm::MaybeAlign(1)); + controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8)); controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); }; - addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath); - addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath); - addControlVariable("__oclc_daz_opt", daz || fastMath); - addControlVariable("__oclc_correctly_rounded_sqrt32", - correctSqrt && !fastMath); - addControlVariable("__oclc_wavefrontsize64", wave64); - - llvm::Type *i32Ty = llvm::Type::getInt32Ty(module.getContext()); - int abi = 500; - abiVer.getAsInteger(0, abi); - llvm::GlobalVariable *abiVersion = new llvm::GlobalVariable( - module, i32Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, - llvm::ConstantInt::get(i32Ty, abi), "__oclc_ABI_version", nullptr, - llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4); - abiVersion->setVisibility( - llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); - abiVersion->setAlignment(llvm::MaybeAlign(4)); - abiVersion->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); + // Add ocml related control variables. + if (any(libs & AMDGCNLibraries::Ocml)) { + addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8); + addControlVariable("__oclc_daz_opt", daz || fastMath, 8); + addControlVariable("__oclc_correctly_rounded_sqrt32", + correctSqrt && !fastMath, 8); + addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8); + } + // Add ocml or ockl related control variables. + if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) { + addControlVariable("__oclc_wavefrontsize64", wave64, 8); + int abi = 500; + abiVer.getAsInteger(0, abi); + addControlVariable("__oclc_ABI_version", abi, 32); + } } std::optional> @@ -312,43 +347,11 @@ SerializeGPUModuleBase::assembleIsa(StringRef isa) { parser->setTargetParser(*tap); parser->Run(false); - return result; } -#if MLIR_ENABLE_ROCM_CONVERSIONS -namespace { -class AMDGPUSerializer : public SerializeGPUModuleBase { -public: - AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, - const gpu::TargetOptions &targetOptions); - - gpu::GPUModuleOp getOperation(); - - // Compile to HSA. - std::optional> - compileToBinary(const std::string &serializedISA); - - std::optional> - moduleToObject(llvm::Module &llvmModule) override; - -private: - // Target options. - gpu::TargetOptions targetOptions; -}; -} // namespace - -AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, - const gpu::TargetOptions &targetOptions) - : SerializeGPUModuleBase(module, target, targetOptions), - targetOptions(targetOptions) {} - -gpu::GPUModuleOp AMDGPUSerializer::getOperation() { - return dyn_cast(&SerializeGPUModuleBase::getOperation()); -} - std::optional> -AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { +SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { // Assemble the ISA. std::optional> isaBinary = assembleIsa(serializedISA); @@ -407,13 +410,13 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { return SmallVector(buffer.begin(), buffer.end()); } -std::optional> -AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { +std::optional> SerializeGPUModuleBase::moduleToObjectImpl( + const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) { // Return LLVM IR if the compilation target is offload. #define DEBUG_TYPE "serialize-to-llvm" LLVM_DEBUG({ - llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr() - << "\n" + llvm::dbgs() << "LLVM IR for module: " + << cast(getOperation()).getNameAttr() << "\n" << llvmModule << "\n"; }); #undef DEBUG_TYPE @@ -437,7 +440,8 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { } #define DEBUG_TYPE "serialize-to-isa" LLVM_DEBUG({ - llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n" + llvm::dbgs() << "ISA for module: " + << cast(getOperation()).getNameAttr() << "\n" << *serializedISA << "\n"; }); #undef DEBUG_TYPE @@ -448,6 +452,38 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { // Compile to binary. return compileToBinary(*serializedISA); } + +#if MLIR_ENABLE_ROCM_CONVERSIONS +namespace { +class AMDGPUSerializer : public SerializeGPUModuleBase { +public: + AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions); + + gpu::GPUModuleOp getOperation(); + + std::optional> + moduleToObject(llvm::Module &llvmModule) override; + +private: + // Target options. + gpu::TargetOptions targetOptions; +}; +} // namespace + +AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions) + : SerializeGPUModuleBase(module, target, targetOptions), + targetOptions(targetOptions) {} + +gpu::GPUModuleOp AMDGPUSerializer::getOperation() { + return dyn_cast(&SerializeGPUModuleBase::getOperation()); +} + +std::optional> +AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { + return moduleToObjectImpl(targetOptions, llvmModule); +} #endif // MLIR_ENABLE_ROCM_CONVERSIONS std::optional> ROCDLTargetAttrImpl::serializeToObject( @@ -477,10 +513,15 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, const SmallVector &object, const gpu::TargetOptions &options) const { gpu::CompilationTarget format = options.getCompilationTarget(); + // If format is `fatbin` transform it to binary as `fatbin` is not yet + // supported. + if (format > gpu::CompilationTarget::Binary) + format = gpu::CompilationTarget::Binary; + + DictionaryAttr properties{}; Builder builder(attribute.getContext()); return builder.getAttr( - attribute, - format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary - : format, - builder.getStringAttr(StringRef(object.data(), object.size())), nullptr); + attribute, format, + builder.getStringAttr(StringRef(object.data(), object.size())), + properties); }