Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump llvm version required for Power and backport bugfix. #18557

Merged
merged 8 commits into from
Sep 20, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ endif
# If we are running on powerpc64le or ppc64le, set certain options automatically
ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
JCFLAGS += -fsigned-char
override LLVM_VER:=3.8.1
override LLVM_VER:=3.9.0
override OPENBLAS_DYNAMIC_ARCH:=0
override OPENBLAS_TARGET_ARCH:=POWER8
# GCC doesn't do -march= on ppc64le
Expand Down
1 change: 1 addition & 0 deletions deps/checksums/cfe-3.9.0.src.tar.xz/md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
29e1d86bee422ab5345f5e9fb808d2dc
1 change: 1 addition & 0 deletions deps/checksums/cfe-3.9.0.src.tar.xz/sha512
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f405b5819fd19276b2de4da80a51171a3b1c5c00df810e1f284184c693e410775fe2933576f942b881664dd3b75e78f0f15b7e5a3eca7819ffbcfe528d9a578e
1 change: 1 addition & 0 deletions deps/checksums/compiler-rt-3.9.0.src.tar.xz/md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
b7ea34c9d744da16ffc0217b6990d095
1 change: 1 addition & 0 deletions deps/checksums/compiler-rt-3.9.0.src.tar.xz/sha512
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
8d15c0315d7a89d9336e0ae4b72775d0815a8e7d962056645e1d0c6a23759bb2d8133c9ad2164e181f017641e8a184a062fbc876a371b81d5b1c2626405f6616
1 change: 1 addition & 0 deletions deps/checksums/llvm-3.9.0.src.tar.xz/md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f2093e98060532449eb7d2fcfd0bc6c6
1 change: 1 addition & 0 deletions deps/checksums/llvm-3.9.0.src.tar.xz/sha512
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f18ae32531218ab156c3e56c11826be0fd05f0799c82eaf2e6a043b38e1277560b4d921987513b88b11d97ea2619feaf277eec72181dd2ae4f4108c2836bf7b2
4 changes: 4 additions & 0 deletions deps/llvm.mk
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,10 @@ $(eval $(call LLVM_PATCH,llvm-PR27046)) # Remove for 3.9
$(eval $(call LLVM_PATCH,llvm-3.8.0_ppc64_SUBFC8)) # Remove for 3.9
$(eval $(call LLVM_PATCH,llvm-D21271-instcombine-tbaa-3.8)) # Remove for 3.9
$(eval $(call LLVM_PATCH,llvm-win64-reloc-dwarf))
else ifeq ($(LLVM_VER_SHORT),3.9)
# fix lowering for atomics on ppc
$(eval $(call LLVM_PATCH,llvm-rL279933-ppc-atomicrmw-lowering)) # Remove for 4.0
$(eval $(call LLVM_PATCH,llvm-PR22923)) # Remove for 4.0
endif # LLVM_VER

ifeq ($(LLVM_VER),3.7.1)
Expand Down
151 changes: 151 additions & 0 deletions deps/patches/llvm-PR22923.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
From e060ffb4b20e294ecb8429bd8a925f9f12b63b17 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Mon, 29 Aug 2016 22:25:36 +0000
Subject: [PATCH] [PowerPC] Fix i8/i16 atomics for little-Endian targets
without partword atomics

For little-Endian PowerPC, we generally target only P8 and later by default.
However, generic (older) 64-bit configurations are still an option, and in that
case, partword atomics are not available (e.g. stbcx.). To lower i8/i16 atomics
without true i8/i16 atomic operations, we emulate using i32 atomics in
combination with a bunch of shifting and masking, etc. The amount by which to
shift in little-Endian mode is different from the amount in big-Endian mode (it
is inverted -- meaning we can leave off the xor when computing the amount).

Fixes PR22923.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280022 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/PowerPC/PPCISelLowering.cpp | 18 ++++++++++++------
test/CodeGen/PowerPC/atomic-2.ll | 15 ++++++++++++++-
2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index e89b6ca..f895b06 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8513,6 +8513,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
bool is64bit = Subtarget.isPPC64();
+ bool isLittleEndian = Subtarget.isLittleEndian();
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;

const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -8542,7 +8543,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
: &PPC::GPRCRegClass;
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
- unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg =
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
unsigned MaskReg = RegInfo.createVirtualRegister(RC);
unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
@@ -8587,8 +8589,9 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
}
BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
.addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (!isLittleEndian)
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
.addReg(Ptr1Reg).addImm(0).addImm(61);
@@ -9293,6 +9296,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// since we're actually doing arithmetic on them. Other registers
// can be 32-bit.
bool is64bit = Subtarget.isPPC64();
+ bool isLittleEndian = Subtarget.isLittleEndian();
bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;

unsigned dest = MI.getOperand(0).getReg();
@@ -9319,7 +9323,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
: &PPC::GPRCRegClass;
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
- unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg =
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
@@ -9374,8 +9379,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
.addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (!isLittleEndian)
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
.addReg(Ptr1Reg).addImm(0).addImm(61);
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 1857d5d..bafabdb 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=ppc64 | FileCheck %s
+; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
+; RUN: llc < %s -march=ppc64le | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s
; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P8U

@@ -12,6 +13,8 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {

define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind {
; CHECK-LABEL: exchange_and_add8:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
; CHECK-P8U: lbarx
%tmp = atomicrmw add i8* %mem, i8 %val monotonic
; CHECK-P8U: stbcx.
@@ -20,6 +23,8 @@ define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind {

define i16 @exchange_and_add16(i16* %mem, i16 %val) nounwind {
; CHECK-LABEL: exchange_and_add16:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
; CHECK-P8U: lharx
%tmp = atomicrmw add i16* %mem, i16 %val monotonic
; CHECK-P8U: sthcx.
@@ -38,6 +43,8 @@ define i64 @exchange_and_cmp(i64* %mem) nounwind {

define i8 @exchange_and_cmp8(i8* %mem) nounwind {
; CHECK-LABEL: exchange_and_cmp8:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
; CHECK-P8U: lbarx
%tmppair = cmpxchg i8* %mem, i8 0, i8 1 monotonic monotonic
%tmp = extractvalue { i8, i1 } %tmppair, 0
@@ -48,6 +55,8 @@ define i8 @exchange_and_cmp8(i8* %mem) nounwind {

define i16 @exchange_and_cmp16(i16* %mem) nounwind {
; CHECK-LABEL: exchange_and_cmp16:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
; CHECK-P8U: lharx
%tmppair = cmpxchg i16* %mem, i16 0, i16 1 monotonic monotonic
%tmp = extractvalue { i16, i1 } %tmppair, 0
@@ -66,6 +75,8 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind {

define i8 @exchange8(i8* %mem, i8 %val) nounwind {
; CHECK-LABEL: exchange8:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
; CHECK-P8U: lbarx
%tmp = atomicrmw xchg i8* %mem, i8 1 monotonic
; CHECK-P8U: stbcx.
@@ -74,6 +85,8 @@ define i8 @exchange8(i8* %mem, i8 %val) nounwind {

define i16 @exchange16(i16* %mem, i16 %val) nounwind {
; CHECK-LABEL: exchange16:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
; CHECK-P8U: lharx
%tmp = atomicrmw xchg i16* %mem, i16 1 monotonic
; CHECK-P8U: sthcx.
Loading