JuliaLang · ViralBShah · Sep 20, 2016 · Sep 16, 2016 · Sep 16, 2016 · Sep 17, 2016
diff --git a/Make.inc b/Make.inc
@@ -640,7 +640,7 @@ endif
 # If we are running on powerpc64le or ppc64le, set certain options automatically
 ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
 JCFLAGS += -fsigned-char
-override LLVM_VER:=3.8.1
+override LLVM_VER:=3.9.0
 override OPENBLAS_DYNAMIC_ARCH:=0
 override OPENBLAS_TARGET_ARCH:=POWER8
 # GCC doesn't do -march= on ppc64le

diff --git a/deps/checksums/cfe-3.9.0.src.tar.xz/md5 b/deps/checksums/cfe-3.9.0.src.tar.xz/md5
@@ -0,0 +1 @@
+29e1d86bee422ab5345f5e9fb808d2dc
diff --git a/deps/checksums/cfe-3.9.0.src.tar.xz/sha512 b/deps/checksums/cfe-3.9.0.src.tar.xz/sha512
@@ -0,0 +1 @@
+f405b5819fd19276b2de4da80a51171a3b1c5c00df810e1f284184c693e410775fe2933576f942b881664dd3b75e78f0f15b7e5a3eca7819ffbcfe528d9a578e
diff --git a/deps/checksums/compiler-rt-3.9.0.src.tar.xz/md5 b/deps/checksums/compiler-rt-3.9.0.src.tar.xz/md5
@@ -0,0 +1 @@
+b7ea34c9d744da16ffc0217b6990d095
diff --git a/deps/checksums/compiler-rt-3.9.0.src.tar.xz/sha512 b/deps/checksums/compiler-rt-3.9.0.src.tar.xz/sha512
@@ -0,0 +1 @@
+8d15c0315d7a89d9336e0ae4b72775d0815a8e7d962056645e1d0c6a23759bb2d8133c9ad2164e181f017641e8a184a062fbc876a371b81d5b1c2626405f6616
diff --git a/deps/checksums/llvm-3.9.0.src.tar.xz/md5 b/deps/checksums/llvm-3.9.0.src.tar.xz/md5
@@ -0,0 +1 @@
+f2093e98060532449eb7d2fcfd0bc6c6
diff --git a/deps/checksums/llvm-3.9.0.src.tar.xz/sha512 b/deps/checksums/llvm-3.9.0.src.tar.xz/sha512
@@ -0,0 +1 @@
+f18ae32531218ab156c3e56c11826be0fd05f0799c82eaf2e6a043b38e1277560b4d921987513b88b11d97ea2619feaf277eec72181dd2ae4f4108c2836bf7b2
diff --git a/deps/llvm.mk b/deps/llvm.mk
@@ -456,6 +456,10 @@ $(eval $(call LLVM_PATCH,llvm-PR27046)) # Remove for 3.9
 $(eval $(call LLVM_PATCH,llvm-3.8.0_ppc64_SUBFC8)) # Remove for 3.9
 $(eval $(call LLVM_PATCH,llvm-D21271-instcombine-tbaa-3.8)) # Remove for 3.9
 $(eval $(call LLVM_PATCH,llvm-win64-reloc-dwarf))
+else ifeq ($(LLVM_VER_SHORT),3.9)
+# fix lowering for atomics on ppc
+$(eval $(call LLVM_PATCH,llvm-rL279933-ppc-atomicrmw-lowering)) # Remove for 4.0
+$(eval $(call LLVM_PATCH,llvm-PR22923)) # Remove for 4.0
 endif # LLVM_VER
 
 ifeq ($(LLVM_VER),3.7.1)

diff --git a/deps/patches/llvm-PR22923.patch b/deps/patches/llvm-PR22923.patch
@@ -0,0 +1,151 @@
+From e060ffb4b20e294ecb8429bd8a925f9f12b63b17 Mon Sep 17 00:00:00 2001
+From: Hal Finkel <hfinkel@anl.gov>
+Date: Mon, 29 Aug 2016 22:25:36 +0000
+Subject: [PATCH] [PowerPC] Fix i8/i16 atomics for little-Endian targets
+ without partword atomics
+
+For little-Endian PowerPC, we generally target only P8 and later by default.
+However, generic (older) 64-bit configurations are still an option, and in that
+case, partword atomics are not available (e.g. stbcx.). To lower i8/i16 atomics
+without true i8/i16 atomic operations, we emulate using i32 atomics in
+combination with a bunch of shifting and masking, etc. The amount by which to
+shift in little-Endian mode is different from the amount in big-Endian mode (it
+is inverted -- meaning we can leave off the xor when computing the amount).
+
+Fixes PR22923.
+
+git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280022 91177308-0d34-0410-b5e6-96231b3b80d8
+---
+ lib/Target/PowerPC/PPCISelLowering.cpp | 18 ++++++++++++------
+ test/CodeGen/PowerPC/atomic-2.ll       | 15 ++++++++++++++-
+ 2 files changed, 26 insertions(+), 7 deletions(-)
+
+diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
+index e89b6ca..f895b06 100644
+--- a/lib/Target/PowerPC/PPCISelLowering.cpp
++++ b/lib/Target/PowerPC/PPCISelLowering.cpp
+@@ -8513,6 +8513,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
+   // registers without caring whether they're 32 or 64, but here we're
+   // doing actual arithmetic on the addresses.
+   bool is64bit = Subtarget.isPPC64();
++  bool isLittleEndian = Subtarget.isLittleEndian();
+   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
+
+   const BasicBlock *LLVM_BB = BB->getBasicBlock();
+@@ -8542,7 +8543,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
+                                           : &PPC::GPRCRegClass;
+   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+-  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
++  unsigned ShiftReg =
++    isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
+   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
+   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+@@ -8587,8 +8589,9 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
+   }
+   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+-  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+-      .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
++  if (!isLittleEndian)
++    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
++        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+   if (is64bit)
+     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+       .addReg(Ptr1Reg).addImm(0).addImm(61);
+@@ -9293,6 +9296,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+     // since we're actually doing arithmetic on them.  Other registers
+     // can be 32-bit.
+     bool is64bit = Subtarget.isPPC64();
++    bool isLittleEndian = Subtarget.isLittleEndian();
+     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
+
+     unsigned dest = MI.getOperand(0).getReg();
+@@ -9319,7 +9323,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+                                             : &PPC::GPRCRegClass;
+     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+-    unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
++    unsigned ShiftReg =
++      isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
+     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
+     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
+     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
+@@ -9374,8 +9379,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+     }
+     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+-    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+-        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
++    if (!isLittleEndian)
++      BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
++          .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+     if (is64bit)
+       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+         .addReg(Ptr1Reg).addImm(0).addImm(61);
+diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
+index 1857d5d..bafabdb 100644
+--- a/test/CodeGen/PowerPC/atomic-2.ll
++++ b/test/CodeGen/PowerPC/atomic-2.ll
+@@ -1,4 +1,5 @@
+-; RUN: llc < %s -march=ppc64 | FileCheck %s
++; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
++; RUN: llc < %s -march=ppc64le | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
+ ; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s
+ ; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P8U
+
+@@ -12,6 +13,8 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
+
+ define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind {
+ ; CHECK-LABEL: exchange_and_add8:
++; CHECK-BE: xori
++; CHECK-LE-NOT: xori
+ ; CHECK-P8U: lbarx
+   %tmp = atomicrmw add i8* %mem, i8 %val monotonic
+ ; CHECK-P8U: stbcx.
+@@ -20,6 +23,8 @@ define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind {
+
+ define i16 @exchange_and_add16(i16* %mem, i16 %val) nounwind {
+ ; CHECK-LABEL: exchange_and_add16:
++; CHECK-BE: xori
++; CHECK-LE-NOT: xori
+ ; CHECK-P8U: lharx
+   %tmp = atomicrmw add i16* %mem, i16 %val monotonic
+ ; CHECK-P8U: sthcx.
+@@ -38,6 +43,8 @@ define i64 @exchange_and_cmp(i64* %mem) nounwind {
+
+ define i8 @exchange_and_cmp8(i8* %mem) nounwind {
+ ; CHECK-LABEL: exchange_and_cmp8:
++; CHECK-BE: xori
++; CHECK-LE-NOT: xori
+ ; CHECK-P8U: lbarx
+   %tmppair = cmpxchg i8* %mem, i8 0, i8 1 monotonic monotonic
+   %tmp = extractvalue { i8, i1 } %tmppair, 0
+@@ -48,6 +55,8 @@ define i8 @exchange_and_cmp8(i8* %mem) nounwind {
+
+ define i16 @exchange_and_cmp16(i16* %mem) nounwind {
+ ; CHECK-LABEL: exchange_and_cmp16:
++; CHECK-BE: xori
++; CHECK-LE-NOT: xori
+ ; CHECK-P8U: lharx
+   %tmppair = cmpxchg i16* %mem, i16 0, i16 1 monotonic monotonic
+   %tmp = extractvalue { i16, i1 } %tmppair, 0
+@@ -66,6 +75,8 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind {
+
+ define i8 @exchange8(i8* %mem, i8 %val) nounwind {
+ ; CHECK-LABEL: exchange8:
++; CHECK-BE: xori
++; CHECK-LE-NOT: xori
+ ; CHECK-P8U: lbarx
+   %tmp = atomicrmw xchg i8* %mem, i8 1 monotonic
+ ; CHECK-P8U: stbcx.
+@@ -74,6 +85,8 @@ define i8 @exchange8(i8* %mem, i8 %val) nounwind {
+
+ define i16 @exchange16(i16* %mem, i16 %val) nounwind {
+ ; CHECK-LABEL: exchange16:
++; CHECK-BE: xori
++; CHECK-LE-NOT: xori
+ ; CHECK-P8U: lharx
+   %tmp = atomicrmw xchg i16* %mem, i16 1 monotonic
+ ; CHECK-P8U: sthcx.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		f405b5819fd19276b2de4da80a51171a3b1c5c00df810e1f284184c693e410775fe2933576f942b881664dd3b75e78f0f15b7e5a3eca7819ffbcfe528d9a578e
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		8d15c0315d7a89d9336e0ae4b72775d0815a8e7d962056645e1d0c6a23759bb2d8133c9ad2164e181f017641e8a184a062fbc876a371b81d5b1c2626405f6616
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		f18ae32531218ab156c3e56c11826be0fd05f0799c82eaf2e6a043b38e1277560b4d921987513b88b11d97ea2619feaf277eec72181dd2ae4f4108c2836bf7b2