From 9926ef7b49cb89cc1c8225a016749bd5f1dbf926 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sat, 17 Sep 2016 08:09:20 +0900 Subject: [PATCH 1/8] Add patch for lowering of atomicrmw on ppc only apply patch to 3.9 for now. --- deps/llvm.mk | 3 + ...llvm-rL279933-ppc-atomicrmw-lowering.patch | 732 ++++++++++++++++++ 2 files changed, 735 insertions(+) create mode 100644 deps/patches/llvm-rL279933-ppc-atomicrmw-lowering.patch diff --git a/deps/llvm.mk b/deps/llvm.mk index 6796cce9e2e45..e762e4d8ecdcd 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -456,6 +456,9 @@ $(eval $(call LLVM_PATCH,llvm-PR27046)) # Remove for 3.9 $(eval $(call LLVM_PATCH,llvm-3.8.0_ppc64_SUBFC8)) # Remove for 3.9 $(eval $(call LLVM_PATCH,llvm-D21271-instcombine-tbaa-3.8)) # Remove for 3.9 $(eval $(call LLVM_PATCH,llvm-win64-reloc-dwarf)) +else ifeq ($(LLVM_VER_SHORT),3.9) +# fix lowering for atomics on ppc +$(eval $(call LLVM_PATCH,llvm-rL279933-ppc-atomicrmw-lowering)) # Remove for 4.0 endif # LLVM_VER ifeq ($(LLVM_VER),3.7.1) diff --git a/deps/patches/llvm-rL279933-ppc-atomicrmw-lowering.patch b/deps/patches/llvm-rL279933-ppc-atomicrmw-lowering.patch new file mode 100644 index 0000000000000..5bfdd7559c2a1 --- /dev/null +++ b/deps/patches/llvm-rL279933-ppc-atomicrmw-lowering.patch @@ -0,0 +1,732 @@ +From afa0d1049b60d7cba63bf3d27872dda96db44ac9 Mon Sep 17 00:00:00 2001 +From: Hal Finkel +Date: Sun, 28 Aug 2016 16:17:58 +0000 +Subject: [PATCH] [PowerPC] Implement lowering for atomicrmw min/max/umin/umax + +Implement lowering for atomicrmw min/max/umin/umax. Fixes PR28818. + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279933 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Target/PowerPC/PPCISelLowering.cpp | 101 +++++++- + lib/Target/PowerPC/PPCISelLowering.h | 8 +- + lib/Target/PowerPC/PPCInstr64Bit.td | 12 + + lib/Target/PowerPC/PPCInstrInfo.td | 36 +++ + test/CodeGen/PowerPC/atomic-minmax.ll | 435 +++++++++++++++++++++++++++++++++ + 5 files changed, 587 insertions(+), 5 deletions(-) + create mode 100644 test/CodeGen/PowerPC/atomic-minmax.ll + +diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp +index bbbc51a..e89b6ca 100644 +--- a/lib/Target/PowerPC/PPCISelLowering.cpp ++++ b/lib/Target/PowerPC/PPCISelLowering.cpp +@@ -8387,7 +8387,9 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, + MachineBasicBlock * + PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, + unsigned AtomicSize, +- unsigned BinOpcode) const { ++ unsigned BinOpcode, ++ unsigned CmpOpcode, ++ unsigned CmpPred) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + +@@ -8427,8 +8429,12 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, + DebugLoc dl = MI.getDebugLoc(); + + MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *loop2MBB = ++ CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loopMBB); ++ if (CmpOpcode) ++ F->insert(It, loop2MBB); + F->insert(It, exitMBB); + exitMBB->splice(exitMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); +@@ -8450,11 +8456,31 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, + // st[wd]cx. r0, ptr + // bne- loopMBB + // fallthrough --> exitMBB ++ ++ // For max/min... ++ // loopMBB: ++ // l[wd]arx dest, ptr ++ // cmpl?[wd] incr, dest ++ // bgt exitMBB ++ // loop2MBB: ++ // st[wd]cx. dest, ptr ++ // bne- loopMBB ++ // fallthrough --> exitMBB ++ + BB = loopMBB; + BuildMI(BB, dl, TII->get(LoadMnemonic), dest) + .addReg(ptrA).addReg(ptrB); + if (BinOpcode) + BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); ++ if (CmpOpcode) { ++ BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) ++ .addReg(incr).addReg(dest); ++ BuildMI(BB, dl, TII->get(PPC::BCC)) ++ .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); ++ BB->addSuccessor(loop2MBB); ++ BB->addSuccessor(exitMBB); ++ BB = loop2MBB; ++ } + BuildMI(BB, dl, TII->get(StoreMnemonic)) + .addReg(TmpReg).addReg(ptrA).addReg(ptrB); + BuildMI(BB, dl, TII->get(PPC::BCC)) +@@ -8472,10 +8498,13 @@ MachineBasicBlock * + PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, + MachineBasicBlock *BB, + bool is8bit, // operation +- unsigned BinOpcode) const { ++ unsigned BinOpcode, ++ unsigned CmpOpcode, ++ unsigned CmpPred) const { + // If we support part-word atomic mnemonics, just use them + if (Subtarget.hasPartwordAtomics()) +- return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); ++ return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, ++ CmpOpcode, CmpPred); + + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); +@@ -8497,8 +8526,12 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, + DebugLoc dl = MI.getDebugLoc(); + + MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *loop2MBB = ++ CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loopMBB); ++ if (CmpOpcode) ++ F->insert(It, loop2MBB); + F->insert(It, exitMBB); + exitMBB->splice(exitMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); +@@ -8583,6 +8616,32 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, + .addReg(TmpDestReg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) + .addReg(TmpReg).addReg(MaskReg); ++ if (CmpOpcode) { ++ // For unsigned comparisons, we can directly compare the shifted values. ++ // For signed comparisons we shift and sign extend. ++ unsigned SReg = RegInfo.createVirtualRegister(RC); ++ BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg) ++ .addReg(TmpDestReg).addReg(MaskReg); ++ unsigned ValueReg = SReg; ++ unsigned CmpReg = Incr2Reg; ++ if (CmpOpcode == PPC::CMPW) { ++ ValueReg = RegInfo.createVirtualRegister(RC); ++ BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) ++ .addReg(SReg).addReg(ShiftReg); ++ unsigned ValueSReg = RegInfo.createVirtualRegister(RC); ++ BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) ++ .addReg(ValueReg); ++ ValueReg = ValueSReg; ++ CmpReg = incr; ++ } ++ BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) ++ .addReg(CmpReg).addReg(ValueReg); ++ BuildMI(BB, dl, TII->get(PPC::BCC)) ++ .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); ++ BB->addSuccessor(loop2MBB); ++ BB->addSuccessor(exitMBB); ++ BB = loop2MBB; ++ } + BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) + .addReg(Tmp3Reg).addReg(Tmp2Reg); + BuildMI(BB, dl, TII->get(PPC::STWCX)) +@@ -9089,6 +9148,42 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) + BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); + ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8) ++ BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16) ++ BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32) ++ BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64) ++ BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); ++ ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8) ++ BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16) ++ BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32) ++ BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64) ++ BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); ++ ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8) ++ BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16) ++ BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32) ++ BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64) ++ BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); ++ ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8) ++ BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16) ++ BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32) ++ BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); ++ else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64) ++ BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); ++ + else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0); + else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) +diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h +index 13b8637..1175ccd 100644 +--- a/lib/Target/PowerPC/PPCISelLowering.h ++++ b/lib/Target/PowerPC/PPCISelLowering.h +@@ -589,11 +589,15 @@ namespace llvm { + MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI, + MachineBasicBlock *MBB, + unsigned AtomicSize, +- unsigned BinOpcode) const; ++ unsigned BinOpcode, ++ unsigned CmpOpcode = 0, ++ unsigned CmpPred = 0) const; + MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI, + MachineBasicBlock *MBB, + bool is8bit, +- unsigned Opcode) const; ++ unsigned Opcode, ++ unsigned CmpOpcode = 0, ++ unsigned CmpPred = 0) const; + + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, + MachineBasicBlock *MBB) const; +diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td +index e7eb8a1..5e514c8 100644 +--- a/lib/Target/PowerPC/PPCInstr64Bit.td ++++ b/lib/Target/PowerPC/PPCInstr64Bit.td +@@ -224,6 +224,18 @@ let usesCustomInserter = 1 in { + def ATOMIC_LOAD_NAND_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64", + [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>; ++ def ATOMIC_LOAD_MIN_I64 : Pseudo< ++ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64", ++ [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>; ++ def ATOMIC_LOAD_MAX_I64 : Pseudo< ++ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64", ++ [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>; ++ def ATOMIC_LOAD_UMIN_I64 : Pseudo< ++ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64", ++ [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>; ++ def ATOMIC_LOAD_UMAX_I64 : Pseudo< ++ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64", ++ [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>; + + def ATOMIC_CMP_SWAP_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64", +diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td +index 876e066..f0476fb 100644 +--- a/lib/Target/PowerPC/PPCInstrInfo.td ++++ b/lib/Target/PowerPC/PPCInstrInfo.td +@@ -1509,6 +1509,18 @@ let usesCustomInserter = 1 in { + def ATOMIC_LOAD_NAND_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8", + [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>; ++ def ATOMIC_LOAD_MIN_I8 : Pseudo< ++ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8", ++ [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>; ++ def ATOMIC_LOAD_MAX_I8 : Pseudo< ++ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8", ++ [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>; ++ def ATOMIC_LOAD_UMIN_I8 : Pseudo< ++ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8", ++ [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>; ++ def ATOMIC_LOAD_UMAX_I8 : Pseudo< ++ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8", ++ [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_ADD_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16", + [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>; +@@ -1527,6 +1539,18 @@ let usesCustomInserter = 1 in { + def ATOMIC_LOAD_NAND_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16", + [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>; ++ def ATOMIC_LOAD_MIN_I16 : Pseudo< ++ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16", ++ [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>; ++ def ATOMIC_LOAD_MAX_I16 : Pseudo< ++ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16", ++ [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>; ++ def ATOMIC_LOAD_UMIN_I16 : Pseudo< ++ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16", ++ [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>; ++ def ATOMIC_LOAD_UMAX_I16 : Pseudo< ++ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16", ++ [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_ADD_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32", + [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>; +@@ -1545,6 +1569,18 @@ let usesCustomInserter = 1 in { + def ATOMIC_LOAD_NAND_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32", + [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>; ++ def ATOMIC_LOAD_MIN_I32 : Pseudo< ++ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32", ++ [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>; ++ def ATOMIC_LOAD_MAX_I32 : Pseudo< ++ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32", ++ [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>; ++ def ATOMIC_LOAD_UMIN_I32 : Pseudo< ++ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32", ++ [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>; ++ def ATOMIC_LOAD_UMAX_I32 : Pseudo< ++ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32", ++ [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>; + + def ATOMIC_CMP_SWAP_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8", +diff --git a/test/CodeGen/PowerPC/atomic-minmax.ll b/test/CodeGen/PowerPC/atomic-minmax.ll +new file mode 100644 +index 0000000..5b9a153 +--- /dev/null ++++ b/test/CodeGen/PowerPC/atomic-minmax.ll +@@ -0,0 +1,435 @@ ++; RUN: llc < %s | FileCheck %s ++target datalayout = "E-m:e-i64:64-n32:64" ++target triple = "powerpc64-unknown-linux-gnu" ++ ++define void @a32min(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 { ++entry: ++ %0 = atomicrmw min i32* %minimum, i32 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a32min ++; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmpw 4, [[OLDV]] ++; CHECK: bgelr 0 ++; CHECK: stwcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a32max(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 { ++entry: ++ %0 = atomicrmw max i32* %minimum, i32 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a32max ++; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmpw 4, [[OLDV]] ++; CHECK: blelr 0 ++; CHECK: stwcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a32umin(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 { ++entry: ++ %0 = atomicrmw umin i32* %minimum, i32 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a32umin ++; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmplw 4, [[OLDV]] ++; CHECK: bgelr 0 ++; CHECK: stwcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a32umax(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 { ++entry: ++ %0 = atomicrmw umax i32* %minimum, i32 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a32umax ++; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmplw 4, [[OLDV]] ++; CHECK: blelr 0 ++; CHECK: stwcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a16min(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 { ++entry: ++ %0 = atomicrmw min i16* %minimum, i16 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a16min ++; CHECK: lharx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmpw 4, [[OLDV]] ++; CHECK: bgelr 0 ++; CHECK: sthcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a16max(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 { ++entry: ++ %0 = atomicrmw max i16* %minimum, i16 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a16max ++; CHECK: lharx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmpw 4, [[OLDV]] ++; CHECK: blelr 0 ++; CHECK: sthcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a16umin(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 { ++entry: ++ %0 = atomicrmw umin i16* %minimum, i16 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a16umin ++; CHECK: lharx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmplw 4, [[OLDV]] ++; CHECK: bgelr 0 ++; CHECK: sthcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a16umax(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 { ++entry: ++ %0 = atomicrmw umax i16* %minimum, i16 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a16umax ++; CHECK: lharx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmplw 4, [[OLDV]] ++; CHECK: blelr 0 ++; CHECK: sthcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a8min(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 { ++entry: ++ %0 = atomicrmw min i8* %minimum, i8 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a8min ++; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmpw 4, [[OLDV]] ++; CHECK: bgelr 0 ++; CHECK: stbcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a8max(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 { ++entry: ++ %0 = atomicrmw max i8* %minimum, i8 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a8max ++; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmpw 4, [[OLDV]] ++; CHECK: blelr 0 ++; CHECK: stbcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a8umin(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 { ++entry: ++ %0 = atomicrmw umin i8* %minimum, i8 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a8umin ++; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmplw 4, [[OLDV]] ++; CHECK: bgelr 0 ++; CHECK: stbcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a8umax(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 { ++entry: ++ %0 = atomicrmw umax i8* %minimum, i8 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a8umax ++; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmplw 4, [[OLDV]] ++; CHECK: blelr 0 ++; CHECK: stbcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a64min(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 { ++entry: ++ %0 = atomicrmw min i64* %minimum, i64 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a64min ++; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmpd 4, [[OLDV]] ++; CHECK: bgelr 0 ++; CHECK: stdcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a64max(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 { ++entry: ++ %0 = atomicrmw max i64* %minimum, i64 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a64max ++; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmpd 4, [[OLDV]] ++; CHECK: blelr 0 ++; CHECK: stdcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a64umin(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 { ++entry: ++ %0 = atomicrmw umin i64* %minimum, i64 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a64umin ++; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmpld 4, [[OLDV]] ++; CHECK: bgelr 0 ++; CHECK: stdcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @a64umax(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 { ++entry: ++ %0 = atomicrmw umax i64* %minimum, i64 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @a64umax ++; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: cmpld 4, [[OLDV]] ++; CHECK: blelr 0 ++; CHECK: stdcx. 4, 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @ae16min(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 { ++entry: ++ %0 = atomicrmw min i16* %minimum, i16 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @ae16min ++; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27 ++; CHECK-DAG: li [[M1:[0-9]+]], 0 ++; CHECK-DAG: rldicr 3, 3, 0, 61 ++; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16 ++; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535 ++; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] ++; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]] ++; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] ++; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]] ++; CHECK: extsh [[SESMOLDV:[0-9]+]], [[SMOLDV]] ++; CHECK: cmpw 0, 4, [[SESMOLDV]] ++; CHECK: bgelr 0 ++; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] ++; CHECK: stwcx. [[NEWV]], 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @ae16max(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 { ++entry: ++ %0 = atomicrmw max i16* %minimum, i16 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @ae16max ++; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27 ++; CHECK-DAG: li [[M1:[0-9]+]], 0 ++; CHECK-DAG: rldicr 3, 3, 0, 61 ++; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16 ++; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535 ++; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] ++; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]] ++; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] ++; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]] ++; CHECK: extsh [[SESMOLDV:[0-9]+]], [[SMOLDV]] ++; CHECK: cmpw 0, 4, [[SESMOLDV]] ++; CHECK: blelr 0 ++; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] ++; CHECK: stwcx. [[NEWV]], 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @ae16umin(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 { ++entry: ++ %0 = atomicrmw umin i16* %minimum, i16 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @ae16umin ++; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27 ++; CHECK-DAG: li [[M1:[0-9]+]], 0 ++; CHECK-DAG: rldicr 3, 3, 0, 61 ++; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16 ++; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535 ++; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] ++; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]] ++; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] ++; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: cmplw 0, 4, [[MOLDV]] ++; CHECK: bgelr 0 ++; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] ++; CHECK: stwcx. [[NEWV]], 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @ae16umax(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 { ++entry: ++ %0 = atomicrmw umax i16* %minimum, i16 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @ae16umax ++; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27 ++; CHECK-DAG: li [[M1:[0-9]+]], 0 ++; CHECK-DAG: rldicr 3, 3, 0, 61 ++; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16 ++; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535 ++; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] ++; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]] ++; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] ++; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: cmplw 0, 4, [[MOLDV]] ++; CHECK: blelr 0 ++; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] ++; CHECK: stwcx. [[NEWV]], 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @ae8min(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 { ++entry: ++ %0 = atomicrmw min i8* %minimum, i8 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @ae8min ++; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28 ++; CHECK-DAG: li [[M1:[0-9]+]], 255 ++; CHECK-DAG: rldicr 3, 3, 0, 61 ++; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24 ++; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] ++; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]] ++; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] ++; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]] ++; CHECK: extsb [[SESMOLDV:[0-9]+]], [[SMOLDV]] ++; CHECK: cmpw 0, 4, [[SESMOLDV]] ++; CHECK: bgelr 0 ++; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] ++; CHECK: stwcx. [[NEWV]], 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @ae8max(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 { ++entry: ++ %0 = atomicrmw max i8* %minimum, i8 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @ae8max ++; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28 ++; CHECK-DAG: li [[M1:[0-9]+]], 255 ++; CHECK-DAG: rldicr 3, 3, 0, 61 ++; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24 ++; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] ++; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]] ++; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] ++; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]] ++; CHECK: extsb [[SESMOLDV:[0-9]+]], [[SMOLDV]] ++; CHECK: cmpw 0, 4, [[SESMOLDV]] ++; CHECK: blelr 0 ++; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] ++; CHECK: stwcx. [[NEWV]], 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @ae8umin(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 { ++entry: ++ %0 = atomicrmw umin i8* %minimum, i8 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @ae8umin ++; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28 ++; CHECK-DAG: li [[M1:[0-9]+]], 255 ++; CHECK-DAG: rldicr 3, 3, 0, 61 ++; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24 ++; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] ++; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]] ++; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] ++; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: cmplw 0, 4, [[MOLDV]] ++; CHECK: bgelr 0 ++; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] ++; CHECK: stwcx. [[NEWV]], 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++define void @ae8umax(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 { ++entry: ++ %0 = atomicrmw umax i8* %minimum, i8 %val monotonic ++ ret void ++ ++; CHECK-LABEL: @ae8umax ++; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28 ++; CHECK-DAG: li [[M1:[0-9]+]], 255 ++; CHECK-DAG: rldicr 3, 3, 0, 61 ++; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24 ++; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] ++; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]] ++; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] ++; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 ++; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: cmplw 0, 4, [[MOLDV]] ++; CHECK: blelr 0 ++; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] ++; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] ++; CHECK: stwcx. [[NEWV]], 0, 3 ++; CHECK: bne 0, ++; CHECK: blr ++} ++ ++attributes #0 = { nounwind "target-cpu"="ppc64" } ++attributes #1 = { nounwind "target-cpu"="pwr8" } ++ +-- +2.9.3 + From c0eedf06c6a5290fafd02b1c77ead98af5250e36 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sat, 17 Sep 2016 08:47:50 +0900 Subject: [PATCH 2/8] bump LLVM version required for ppc --- Make.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Make.inc b/Make.inc index 3b0b2748d1a03..621da0898e33c 100644 --- a/Make.inc +++ b/Make.inc @@ -640,7 +640,7 @@ endif # If we are running on powerpc64le or ppc64le, set certain options automatically ifneq (,$(filter $(ARCH), powerpc64le ppc64le)) JCFLAGS += -fsigned-char -override LLVM_VER:=3.8.1 +override LLVM_VER:=3.9.0 override OPENBLAS_DYNAMIC_ARCH:=0 override OPENBLAS_TARGET_ARCH:=POWER8 # GCC doesn't do -march= on ppc64le From f8cd8675c1b73c345dc4aeb96625723695b1b4ad Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sun, 18 Sep 2016 00:30:52 +0900 Subject: [PATCH 3/8] add checksums for llvm-3.9.0 --- deps/checksums/llvm-3.9.0.src.tar.xz/md5 | 1 + deps/checksums/llvm-3.9.0.src.tar.xz/sha512 | 1 + 2 files changed, 2 insertions(+) create mode 100644 deps/checksums/llvm-3.9.0.src.tar.xz/md5 create mode 100644 deps/checksums/llvm-3.9.0.src.tar.xz/sha512 diff --git a/deps/checksums/llvm-3.9.0.src.tar.xz/md5 b/deps/checksums/llvm-3.9.0.src.tar.xz/md5 new file mode 100644 index 0000000000000..69609abdf910a --- /dev/null +++ b/deps/checksums/llvm-3.9.0.src.tar.xz/md5 @@ -0,0 +1 @@ +f2093e98060532449eb7d2fcfd0bc6c6 diff --git a/deps/checksums/llvm-3.9.0.src.tar.xz/sha512 b/deps/checksums/llvm-3.9.0.src.tar.xz/sha512 new file mode 100644 index 0000000000000..9f7e064d2f697 --- /dev/null +++ b/deps/checksums/llvm-3.9.0.src.tar.xz/sha512 @@ -0,0 +1 @@ +f18ae32531218ab156c3e56c11826be0fd05f0799c82eaf2e6a043b38e1277560b4d921987513b88b11d97ea2619feaf277eec72181dd2ae4f4108c2836bf7b2 From af13585e9d4cc7ebcf2cbe466b8fe41e4fbac7ba Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sun, 18 Sep 2016 00:32:47 +0900 Subject: [PATCH 4/8] add checksums for compiler-rt-3.9.0 --- deps/checksums/compiler-rt-3.9.0.src.tar.xz/md5 | 1 + deps/checksums/compiler-rt-3.9.0.src.tar.xz/sha512 | 1 + 2 files changed, 2 insertions(+) create mode 100644 deps/checksums/compiler-rt-3.9.0.src.tar.xz/md5 create mode 100644 deps/checksums/compiler-rt-3.9.0.src.tar.xz/sha512 diff --git a/deps/checksums/compiler-rt-3.9.0.src.tar.xz/md5 b/deps/checksums/compiler-rt-3.9.0.src.tar.xz/md5 new file mode 100644 index 0000000000000..b1c1059f6c4bf --- /dev/null +++ b/deps/checksums/compiler-rt-3.9.0.src.tar.xz/md5 @@ -0,0 +1 @@ +b7ea34c9d744da16ffc0217b6990d095 diff --git a/deps/checksums/compiler-rt-3.9.0.src.tar.xz/sha512 b/deps/checksums/compiler-rt-3.9.0.src.tar.xz/sha512 new file mode 100644 index 0000000000000..40f891907bcbe --- /dev/null +++ b/deps/checksums/compiler-rt-3.9.0.src.tar.xz/sha512 @@ -0,0 +1 @@ +8d15c0315d7a89d9336e0ae4b72775d0815a8e7d962056645e1d0c6a23759bb2d8133c9ad2164e181f017641e8a184a062fbc876a371b81d5b1c2626405f6616 From 2f4cb25cb6616390180721aa1cc92e243a17e8b8 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sun, 18 Sep 2016 00:36:46 +0900 Subject: [PATCH 5/8] add checksums for cfe-3.9.0 --- deps/checksums/cfe-3.9.0.src.tar.xz/md5 | 1 + deps/checksums/cfe-3.9.0.src.tar.xz/sha512 | 1 + 2 files changed, 2 insertions(+) create mode 100644 deps/checksums/cfe-3.9.0.src.tar.xz/md5 create mode 100644 deps/checksums/cfe-3.9.0.src.tar.xz/sha512 diff --git a/deps/checksums/cfe-3.9.0.src.tar.xz/md5 b/deps/checksums/cfe-3.9.0.src.tar.xz/md5 new file mode 100644 index 0000000000000..425260abd85e4 --- /dev/null +++ b/deps/checksums/cfe-3.9.0.src.tar.xz/md5 @@ -0,0 +1 @@ +29e1d86bee422ab5345f5e9fb808d2dc diff --git a/deps/checksums/cfe-3.9.0.src.tar.xz/sha512 b/deps/checksums/cfe-3.9.0.src.tar.xz/sha512 new file mode 100644 index 0000000000000..1e143511ea733 --- /dev/null +++ b/deps/checksums/cfe-3.9.0.src.tar.xz/sha512 @@ -0,0 +1 @@ +f405b5819fd19276b2de4da80a51171a3b1c5c00df810e1f284184c693e410775fe2933576f942b881664dd3b75e78f0f15b7e5a3eca7819ffbcfe528d9a578e From e01eb548edc595bb428273fd3e370cfc8672f398 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sun, 18 Sep 2016 06:19:24 -0700 Subject: [PATCH 6/8] 128-bit atomics are not yet supported on power --- test/threads.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/threads.jl b/test/threads.jl index 4e21bc5da8740..f8deea7bc89e6 100644 --- a/test/threads.jl +++ b/test/threads.jl @@ -273,7 +273,9 @@ let atomic_types = [Int8, Int16, Int32, Int64, Int128, Float16, Float32, Float64] # Temporarily omit 128-bit types on 32bit x86 # 128-bit atomics do not exist on AArch32. - if Sys.ARCH === :i686 || startswith(string(Sys.ARCH), "arm") + # And we don't support them yet on power. + if Sys.ARCH === :i686 || Sys.ARCH === :powerpc64le || + startswith(string(Sys.ARCH), "arm") filter!(T -> sizeof(T)<=8, atomic_types) end for T in atomic_types From bbca3b4156ae634956741cd973a2ac6a3e883974 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 19 Sep 2016 15:43:22 -0700 Subject: [PATCH 7/8] turn off partword-atomics on power --- src/codegen.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/codegen.cpp b/src/codegen.cpp index 8684437d263b9..49b9e036b319e 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -5939,6 +5939,10 @@ static inline SmallVector getTargetFeatures(std::string &cpu) // is disabled. HostFeatures["cx16"] = true; #endif +#if defined(_CPU_PPC64_) + // Temporarily disable partword (i16) atomics due to LLVM bug(30451). + HostFeatures["partword-atomics"] = false; +#endif // Figure out if we know the cpu_target cpu = (strcmp(jl_options.cpu_target,"native") ? jl_options.cpu_target : From c38d1f8b77bfc030c0fd6015639277143380847c Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 19 Sep 2016 16:13:23 -0700 Subject: [PATCH 8/8] add patch that works around a issue with partword-atomics --- deps/llvm.mk | 1 + deps/patches/llvm-PR22923.patch | 151 ++++++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 deps/patches/llvm-PR22923.patch diff --git a/deps/llvm.mk b/deps/llvm.mk index e762e4d8ecdcd..496af119b460a 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -459,6 +459,7 @@ $(eval $(call LLVM_PATCH,llvm-win64-reloc-dwarf)) else ifeq ($(LLVM_VER_SHORT),3.9) # fix lowering for atomics on ppc $(eval $(call LLVM_PATCH,llvm-rL279933-ppc-atomicrmw-lowering)) # Remove for 4.0 +$(eval $(call LLVM_PATCH,llvm-PR22923)) # Remove for 4.0 endif # LLVM_VER ifeq ($(LLVM_VER),3.7.1) diff --git a/deps/patches/llvm-PR22923.patch b/deps/patches/llvm-PR22923.patch new file mode 100644 index 0000000000000..c48533bcc4d89 --- /dev/null +++ b/deps/patches/llvm-PR22923.patch @@ -0,0 +1,151 @@ +From e060ffb4b20e294ecb8429bd8a925f9f12b63b17 Mon Sep 17 00:00:00 2001 +From: Hal Finkel +Date: Mon, 29 Aug 2016 22:25:36 +0000 +Subject: [PATCH] [PowerPC] Fix i8/i16 atomics for little-Endian targets + without partword atomics + +For little-Endian PowerPC, we generally target only P8 and later by default. +However, generic (older) 64-bit configurations are still an option, and in that +case, partword atomics are not available (e.g. stbcx.). To lower i8/i16 atomics +without true i8/i16 atomic operations, we emulate using i32 atomics in +combination with a bunch of shifting and masking, etc. The amount by which to +shift in little-Endian mode is different from the amount in big-Endian mode (it +is inverted -- meaning we can leave off the xor when computing the amount). + +Fixes PR22923. + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280022 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Target/PowerPC/PPCISelLowering.cpp | 18 ++++++++++++------ + test/CodeGen/PowerPC/atomic-2.ll | 15 ++++++++++++++- + 2 files changed, 26 insertions(+), 7 deletions(-) + +diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp +index e89b6ca..f895b06 100644 +--- a/lib/Target/PowerPC/PPCISelLowering.cpp ++++ b/lib/Target/PowerPC/PPCISelLowering.cpp +@@ -8513,6 +8513,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, + // registers without caring whether they're 32 or 64, but here we're + // doing actual arithmetic on the addresses. + bool is64bit = Subtarget.isPPC64(); ++ bool isLittleEndian = Subtarget.isLittleEndian(); + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); +@@ -8542,7 +8543,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, + : &PPC::GPRCRegClass; + unsigned PtrReg = RegInfo.createVirtualRegister(RC); + unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); +- unsigned ShiftReg = RegInfo.createVirtualRegister(RC); ++ unsigned ShiftReg = ++ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); + unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); + unsigned MaskReg = RegInfo.createVirtualRegister(RC); + unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); +@@ -8587,8 +8589,9 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, + } + BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) + .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); +- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) +- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); ++ if (!isLittleEndian) ++ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) ++ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); + if (is64bit) + BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) + .addReg(Ptr1Reg).addImm(0).addImm(61); +@@ -9293,6 +9296,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + // since we're actually doing arithmetic on them. Other registers + // can be 32-bit. + bool is64bit = Subtarget.isPPC64(); ++ bool isLittleEndian = Subtarget.isLittleEndian(); + bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; + + unsigned dest = MI.getOperand(0).getReg(); +@@ -9319,7 +9323,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + : &PPC::GPRCRegClass; + unsigned PtrReg = RegInfo.createVirtualRegister(RC); + unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); +- unsigned ShiftReg = RegInfo.createVirtualRegister(RC); ++ unsigned ShiftReg = ++ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); + unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); + unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); + unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); +@@ -9374,8 +9379,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + } + BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) + .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); +- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) +- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); ++ if (!isLittleEndian) ++ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) ++ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); + if (is64bit) + BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) + .addReg(Ptr1Reg).addImm(0).addImm(61); +diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll +index 1857d5d..bafabdb 100644 +--- a/test/CodeGen/PowerPC/atomic-2.ll ++++ b/test/CodeGen/PowerPC/atomic-2.ll +@@ -1,4 +1,5 @@ +-; RUN: llc < %s -march=ppc64 | FileCheck %s ++; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE ++; RUN: llc < %s -march=ppc64le | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE + ; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s + ; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P8U + +@@ -12,6 +13,8 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind { + + define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind { + ; CHECK-LABEL: exchange_and_add8: ++; CHECK-BE: xori ++; CHECK-LE-NOT: xori + ; CHECK-P8U: lbarx + %tmp = atomicrmw add i8* %mem, i8 %val monotonic + ; CHECK-P8U: stbcx. +@@ -20,6 +23,8 @@ define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind { + + define i16 @exchange_and_add16(i16* %mem, i16 %val) nounwind { + ; CHECK-LABEL: exchange_and_add16: ++; CHECK-BE: xori ++; CHECK-LE-NOT: xori + ; CHECK-P8U: lharx + %tmp = atomicrmw add i16* %mem, i16 %val monotonic + ; CHECK-P8U: sthcx. +@@ -38,6 +43,8 @@ define i64 @exchange_and_cmp(i64* %mem) nounwind { + + define i8 @exchange_and_cmp8(i8* %mem) nounwind { + ; CHECK-LABEL: exchange_and_cmp8: ++; CHECK-BE: xori ++; CHECK-LE-NOT: xori + ; CHECK-P8U: lbarx + %tmppair = cmpxchg i8* %mem, i8 0, i8 1 monotonic monotonic + %tmp = extractvalue { i8, i1 } %tmppair, 0 +@@ -48,6 +55,8 @@ define i8 @exchange_and_cmp8(i8* %mem) nounwind { + + define i16 @exchange_and_cmp16(i16* %mem) nounwind { + ; CHECK-LABEL: exchange_and_cmp16: ++; CHECK-BE: xori ++; CHECK-LE-NOT: xori + ; CHECK-P8U: lharx + %tmppair = cmpxchg i16* %mem, i16 0, i16 1 monotonic monotonic + %tmp = extractvalue { i16, i1 } %tmppair, 0 +@@ -66,6 +75,8 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind { + + define i8 @exchange8(i8* %mem, i8 %val) nounwind { + ; CHECK-LABEL: exchange8: ++; CHECK-BE: xori ++; CHECK-LE-NOT: xori + ; CHECK-P8U: lbarx + %tmp = atomicrmw xchg i8* %mem, i8 1 monotonic + ; CHECK-P8U: stbcx. +@@ -74,6 +85,8 @@ define i8 @exchange8(i8* %mem, i8 %val) nounwind { + + define i16 @exchange16(i16* %mem, i16 %val) nounwind { + ; CHECK-LABEL: exchange16: ++; CHECK-BE: xori ++; CHECK-LE-NOT: xori + ; CHECK-P8U: lharx + %tmp = atomicrmw xchg i16* %mem, i16 1 monotonic + ; CHECK-P8U: sthcx.