diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 749971e354f66..9b47e212a3049 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -931,6 +931,12 @@ enum : unsigned { // Processor selection mask for EF_CUDA_SM* values prior to blackwell. EF_CUDA_SM = 0xff, + // Processor selection mask for EF_CUDA_SM* values following blackwell. + EF_CUDA_SM_MASK = 0xff00, + + // Processor selection mask for EF_CUDA_SM* values following blackwell. + EF_CUDA_SM_OFFSET = 8, + // SM based processor values. EF_CUDA_SM20 = 0x14, EF_CUDA_SM21 = 0x15, @@ -950,9 +956,15 @@ enum : unsigned { EF_CUDA_SM80 = 0x50, EF_CUDA_SM86 = 0x56, EF_CUDA_SM87 = 0x57, + EF_CUDA_SM88 = 0x58, EF_CUDA_SM89 = 0x59, - // The sm_90a variant uses the same machine flag. EF_CUDA_SM90 = 0x5a, + EF_CUDA_SM100 = 0x64, + EF_CUDA_SM101 = 0x65, + EF_CUDA_SM103 = 0x67, + EF_CUDA_SM110 = 0x6e, + EF_CUDA_SM120 = 0x78, + EF_CUDA_SM121 = 0x79, // Unified texture binding is enabled. EF_CUDA_TEXMODE_UNIFIED = 0x100, @@ -968,17 +980,7 @@ enum : unsigned { // Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values. EF_CUDA_VIRTUAL_SM = 0xff0000, - // Processor selection mask for EF_CUDA_SM* values following blackwell. - EF_CUDA_SM_MASK = 0xff00, - - // SM based processor values. - EF_CUDA_SM100 = 0x6400, - EF_CUDA_SM101 = 0x6500, - EF_CUDA_SM103 = 0x6700, - EF_CUDA_SM120 = 0x7800, - EF_CUDA_SM121 = 0x7900, - - // Set when using an accelerator variant like sm_100a. + // Set when using an accelerator variant like sm_100a in the new ABI. EF_CUDA_ACCELERATORS = 0x8, }; diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index aff047c297cc2..7e7e36d87c864 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -622,7 +622,8 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const { assert(getEMachine() == ELF::EM_CUDA); unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1 ? getPlatformFlags() & ELF::EF_CUDA_SM - : getPlatformFlags() & ELF::EF_CUDA_SM_MASK; + : (getPlatformFlags() & ELF::EF_CUDA_SM_MASK) >> + ELF::EF_CUDA_SM_OFFSET; switch (SM) { // Fermi architecture. @@ -674,6 +675,8 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const { return "sm_86"; case ELF::EF_CUDA_SM87: return "sm_87"; + case ELF::EF_CUDA_SM88: + return "sm_88"; // Ada architecture. case ELF::EF_CUDA_SM89: @@ -694,6 +697,9 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const { case ELF::EF_CUDA_SM103: return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_103a" : "sm_103"; + case ELF::EF_CUDA_SM110: + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_110a" + : "sm_110"; // Rubin architecture. case ELF::EF_CUDA_SM120: diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 7d75f29623ea9..253e9ae163a5b 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1114,6 +1114,7 @@ const EnumEntry ElfOSABI[] = { {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS}, {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI}, {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA}, + {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2}, {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}}; const EnumEntry AMDGPUElfOSABI[] = { @@ -1679,19 +1680,60 @@ const EnumEntry ElfHeaderAMDGPUFlagsABIVersion4[] = { }; const EnumEntry ElfHeaderNVPTXFlags[] = { - ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"), - ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"), - ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"), - ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"), - ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"), - ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"), - ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"), - ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"), - ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"), - ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"), - ENUM_ENT(EF_CUDA_SM100, "sm_100"), ENUM_ENT(EF_CUDA_SM101, "sm_101"), - ENUM_ENT(EF_CUDA_SM103, "sm_103"), ENUM_ENT(EF_CUDA_SM120, "sm_120"), + ENUM_ENT(EF_CUDA_SM20, "sm_20"), + ENUM_ENT(EF_CUDA_SM21, "sm_21"), + ENUM_ENT(EF_CUDA_SM30, "sm_30"), + ENUM_ENT(EF_CUDA_SM32, "sm_32"), + ENUM_ENT(EF_CUDA_SM35, "sm_35"), + ENUM_ENT(EF_CUDA_SM37, "sm_37"), + ENUM_ENT(EF_CUDA_SM50, "sm_50"), + ENUM_ENT(EF_CUDA_SM52, "sm_52"), + ENUM_ENT(EF_CUDA_SM53, "sm_53"), + ENUM_ENT(EF_CUDA_SM60, "sm_60"), + ENUM_ENT(EF_CUDA_SM61, "sm_61"), + ENUM_ENT(EF_CUDA_SM62, "sm_62"), + ENUM_ENT(EF_CUDA_SM70, "sm_70"), + ENUM_ENT(EF_CUDA_SM72, "sm_72"), + ENUM_ENT(EF_CUDA_SM75, "sm_75"), + ENUM_ENT(EF_CUDA_SM80, "sm_80"), + ENUM_ENT(EF_CUDA_SM86, "sm_86"), + ENUM_ENT(EF_CUDA_SM87, "sm_87"), + ENUM_ENT(EF_CUDA_SM88, "sm_88"), + ENUM_ENT(EF_CUDA_SM89, "sm_89"), + ENUM_ENT(EF_CUDA_SM90, "sm_90"), + ENUM_ENT(EF_CUDA_SM100, "sm_100"), + ENUM_ENT(EF_CUDA_SM101, "sm_101"), + ENUM_ENT(EF_CUDA_SM103, "sm_103"), + ENUM_ENT(EF_CUDA_SM110, "sm_110"), + ENUM_ENT(EF_CUDA_SM120, "sm_120"), ENUM_ENT(EF_CUDA_SM121, "sm_121"), + ENUM_ENT(EF_CUDA_SM20 << EF_CUDA_SM_OFFSET, "sm_20"), + ENUM_ENT(EF_CUDA_SM21 << EF_CUDA_SM_OFFSET, "sm_21"), + ENUM_ENT(EF_CUDA_SM30 << EF_CUDA_SM_OFFSET, "sm_30"), + ENUM_ENT(EF_CUDA_SM32 << EF_CUDA_SM_OFFSET, "sm_32"), + ENUM_ENT(EF_CUDA_SM35 << EF_CUDA_SM_OFFSET, "sm_35"), + ENUM_ENT(EF_CUDA_SM37 << EF_CUDA_SM_OFFSET, "sm_37"), + ENUM_ENT(EF_CUDA_SM50 << EF_CUDA_SM_OFFSET, "sm_50"), + ENUM_ENT(EF_CUDA_SM52 << EF_CUDA_SM_OFFSET, "sm_52"), + ENUM_ENT(EF_CUDA_SM53 << EF_CUDA_SM_OFFSET, "sm_53"), + ENUM_ENT(EF_CUDA_SM60 << EF_CUDA_SM_OFFSET, "sm_60"), + ENUM_ENT(EF_CUDA_SM61 << EF_CUDA_SM_OFFSET, "sm_61"), + ENUM_ENT(EF_CUDA_SM62 << EF_CUDA_SM_OFFSET, "sm_62"), + ENUM_ENT(EF_CUDA_SM70 << EF_CUDA_SM_OFFSET, "sm_70"), + ENUM_ENT(EF_CUDA_SM72 << EF_CUDA_SM_OFFSET, "sm_72"), + ENUM_ENT(EF_CUDA_SM75 << EF_CUDA_SM_OFFSET, "sm_75"), + ENUM_ENT(EF_CUDA_SM80 << EF_CUDA_SM_OFFSET, "sm_80"), + ENUM_ENT(EF_CUDA_SM86 << EF_CUDA_SM_OFFSET, "sm_86"), + ENUM_ENT(EF_CUDA_SM87 << EF_CUDA_SM_OFFSET, "sm_87"), + ENUM_ENT(EF_CUDA_SM88 << EF_CUDA_SM_OFFSET, "sm_88"), + ENUM_ENT(EF_CUDA_SM89 << EF_CUDA_SM_OFFSET, "sm_89"), + ENUM_ENT(EF_CUDA_SM90 << EF_CUDA_SM_OFFSET, "sm_90"), + ENUM_ENT(EF_CUDA_SM100 << EF_CUDA_SM_OFFSET, "sm_100"), + ENUM_ENT(EF_CUDA_SM101 << EF_CUDA_SM_OFFSET, "sm_101"), + ENUM_ENT(EF_CUDA_SM103 << EF_CUDA_SM_OFFSET, "sm_103"), + ENUM_ENT(EF_CUDA_SM110 << EF_CUDA_SM_OFFSET, "sm_110"), + ENUM_ENT(EF_CUDA_SM120 << EF_CUDA_SM_OFFSET, "sm_120"), + ENUM_ENT(EF_CUDA_SM121 << EF_CUDA_SM_OFFSET, "sm_121"), }; const EnumEntry ElfHeaderRISCVFlags[] = { diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index 99195cd8d7c99..d973c2d4dd320 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -1581,7 +1581,7 @@ struct CUDAPluginTy final : public GenericPluginTy { unsigned SM = Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 ? Header.e_flags & ELF::EF_CUDA_SM - : (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> 8; + : (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> ELF::EF_CUDA_SM_OFFSET; CUdevice Device; CUresult Res = cuDeviceGet(&Device, DeviceId);