diff --git a/deps/llvm.mk b/deps/llvm.mk index 41d3c24108451..194b9391763db 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -462,6 +462,7 @@ $(eval $(call LLVM_PATCH,llvm-win64-reloc-dwarf)) else ifeq ($(LLVM_VER_SHORT),3.9) # fix lowering for atomics on ppc $(eval $(call LLVM_PATCH,llvm-rL279933-ppc-atomicrmw-lowering)) # Remove for 4.0 +$(eval $(call LLVM_PATCH,llvm-PR22923)) # Remove for 4.0 endif # LLVM_VER ifeq ($(LLVM_VER),3.7.1) diff --git a/deps/patches/llvm-PR22923.patch b/deps/patches/llvm-PR22923.patch new file mode 100644 index 0000000000000..c48533bcc4d89 --- /dev/null +++ b/deps/patches/llvm-PR22923.patch @@ -0,0 +1,151 @@ +From e060ffb4b20e294ecb8429bd8a925f9f12b63b17 Mon Sep 17 00:00:00 2001 +From: Hal Finkel +Date: Mon, 29 Aug 2016 22:25:36 +0000 +Subject: [PATCH] [PowerPC] Fix i8/i16 atomics for little-Endian targets + without partword atomics + +For little-Endian PowerPC, we generally target only P8 and later by default. +However, generic (older) 64-bit configurations are still an option, and in that +case, partword atomics are not available (e.g. stbcx.). To lower i8/i16 atomics +without true i8/i16 atomic operations, we emulate using i32 atomics in +combination with a bunch of shifting and masking, etc. The amount by which to +shift in little-Endian mode is different from the amount in big-Endian mode (it +is inverted -- meaning we can leave off the xor when computing the amount). + +Fixes PR22923. + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280022 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Target/PowerPC/PPCISelLowering.cpp | 18 ++++++++++++------ + test/CodeGen/PowerPC/atomic-2.ll | 15 ++++++++++++++- + 2 files changed, 26 insertions(+), 7 deletions(-) + +diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp +index e89b6ca..f895b06 100644 +--- a/lib/Target/PowerPC/PPCISelLowering.cpp ++++ b/lib/Target/PowerPC/PPCISelLowering.cpp +@@ -8513,6 +8513,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, + // registers without caring whether they're 32 or 64, but here we're + // doing actual arithmetic on the addresses. + bool is64bit = Subtarget.isPPC64(); ++ bool isLittleEndian = Subtarget.isLittleEndian(); + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); +@@ -8542,7 +8543,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, + : &PPC::GPRCRegClass; + unsigned PtrReg = RegInfo.createVirtualRegister(RC); + unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); +- unsigned ShiftReg = RegInfo.createVirtualRegister(RC); ++ unsigned ShiftReg = ++ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); + unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); + unsigned MaskReg = RegInfo.createVirtualRegister(RC); + unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); +@@ -8587,8 +8589,9 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, + } + BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) + .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); +- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) +- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); ++ if (!isLittleEndian) ++ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) ++ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); + if (is64bit) + BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) + .addReg(Ptr1Reg).addImm(0).addImm(61); +@@ -9293,6 +9296,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + // since we're actually doing arithmetic on them. Other registers + // can be 32-bit. + bool is64bit = Subtarget.isPPC64(); ++ bool isLittleEndian = Subtarget.isLittleEndian(); + bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; + + unsigned dest = MI.getOperand(0).getReg(); +@@ -9319,7 +9323,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + : &PPC::GPRCRegClass; + unsigned PtrReg = RegInfo.createVirtualRegister(RC); + unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); +- unsigned ShiftReg = RegInfo.createVirtualRegister(RC); ++ unsigned ShiftReg = ++ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); + unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); + unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); + unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); +@@ -9374,8 +9379,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + } + BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) + .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); +- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) +- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); ++ if (!isLittleEndian) ++ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) ++ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); + if (is64bit) + BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) + .addReg(Ptr1Reg).addImm(0).addImm(61); +diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll +index 1857d5d..bafabdb 100644 +--- a/test/CodeGen/PowerPC/atomic-2.ll ++++ b/test/CodeGen/PowerPC/atomic-2.ll +@@ -1,4 +1,5 @@ +-; RUN: llc < %s -march=ppc64 | FileCheck %s ++; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE ++; RUN: llc < %s -march=ppc64le | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE + ; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s + ; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P8U + +@@ -12,6 +13,8 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind { + + define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind { + ; CHECK-LABEL: exchange_and_add8: ++; CHECK-BE: xori ++; CHECK-LE-NOT: xori + ; CHECK-P8U: lbarx + %tmp = atomicrmw add i8* %mem, i8 %val monotonic + ; CHECK-P8U: stbcx. +@@ -20,6 +23,8 @@ define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind { + + define i16 @exchange_and_add16(i16* %mem, i16 %val) nounwind { + ; CHECK-LABEL: exchange_and_add16: ++; CHECK-BE: xori ++; CHECK-LE-NOT: xori + ; CHECK-P8U: lharx + %tmp = atomicrmw add i16* %mem, i16 %val monotonic + ; CHECK-P8U: sthcx. +@@ -38,6 +43,8 @@ define i64 @exchange_and_cmp(i64* %mem) nounwind { + + define i8 @exchange_and_cmp8(i8* %mem) nounwind { + ; CHECK-LABEL: exchange_and_cmp8: ++; CHECK-BE: xori ++; CHECK-LE-NOT: xori + ; CHECK-P8U: lbarx + %tmppair = cmpxchg i8* %mem, i8 0, i8 1 monotonic monotonic + %tmp = extractvalue { i8, i1 } %tmppair, 0 +@@ -48,6 +55,8 @@ define i8 @exchange_and_cmp8(i8* %mem) nounwind { + + define i16 @exchange_and_cmp16(i16* %mem) nounwind { + ; CHECK-LABEL: exchange_and_cmp16: ++; CHECK-BE: xori ++; CHECK-LE-NOT: xori + ; CHECK-P8U: lharx + %tmppair = cmpxchg i16* %mem, i16 0, i16 1 monotonic monotonic + %tmp = extractvalue { i16, i1 } %tmppair, 0 +@@ -66,6 +75,8 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind { + + define i8 @exchange8(i8* %mem, i8 %val) nounwind { + ; CHECK-LABEL: exchange8: ++; CHECK-BE: xori ++; CHECK-LE-NOT: xori + ; CHECK-P8U: lbarx + %tmp = atomicrmw xchg i8* %mem, i8 1 monotonic + ; CHECK-P8U: stbcx. +@@ -74,6 +85,8 @@ define i8 @exchange8(i8* %mem, i8 %val) nounwind { + + define i16 @exchange16(i16* %mem, i16 %val) nounwind { + ; CHECK-LABEL: exchange16: ++; CHECK-BE: xori ++; CHECK-LE-NOT: xori + ; CHECK-P8U: lharx + %tmp = atomicrmw xchg i16* %mem, i16 1 monotonic + ; CHECK-P8U: sthcx.