diff --git a/deps/Makefile b/deps/Makefile index 68be2c6ca4aca..0f4163e33932c 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -658,6 +658,12 @@ $$(LLVM_SRC_DIR)/$1.patch-applied: $(LLVM_SRC_DIR)/configure | $$(SRCDIR)/$1.pat echo 1 > $$@ LLVM_PATCH_LIST += $$(LLVM_SRC_DIR)/$1.patch-applied endef +define LLVM_PATCH0 +$$(LLVM_SRC_DIR)/$1.patch-applied: $(LLVM_SRC_DIR)/configure | $$(SRCDIR)/$1.patch + cd $$(LLVM_SRC_DIR) && patch -p0 < $$(SRCDIR)/$1.patch + echo 1 > $$@ +LLVM_PATCH_LIST += $$(LLVM_SRC_DIR)/$1.patch-applied +endef ifeq ($(LLVM_VER),3.3) $(eval $(call LLVM_PATCH,llvm-3.3)) $(eval $(call LLVM_PATCH,instcombine-llvm-3.3)) @@ -671,7 +677,10 @@ else ifeq ($(LLVM_VER),3.7.1) $(eval $(call LLVM_PATCH,llvm-3.7.1)) $(eval $(call LLVM_PATCH,llvm-3.7.1_2)) $(eval $(call LLVM_PATCH,llvm-3.7.1_3)) +$(eval $(call LLVM_PATCH0,llvm-D14260)) $(LLVM_SRC_DIR)/llvm-3.7.1_2.patch-applied: $(LLVM_SRC_DIR)/llvm-3.7.1.patch-applied +$(LLVM_SRC_DIR)/llvm-3.7.1_3.patch-applied: $(LLVM_SRC_DIR)/llvm-3.7.1_2.patch-applied +$(LLVM_SRC_DIR)/llvm-D14260.patch-applied: $(LLVM_SRC_DIR)/llvm-3.7.1_3.patch-applied endif # LLVM_VER ifeq ($(LLVM_VER),3.7.1) diff --git a/deps/llvm-D14260.patch b/deps/llvm-D14260.patch new file mode 100644 index 0000000000000..ab4bc5b9329f1 --- /dev/null +++ b/deps/llvm-D14260.patch @@ -0,0 +1,136 @@ +Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +=================================================================== +--- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp ++++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +@@ -879,6 +879,61 @@ + return nullptr; + } + ++/// \brief Look for extractelement/insertvalue sequence that acts like a bitcast. ++/// ++/// \returns underlying value that was "cast", or nullptr otherwise. ++/// ++/// For example, if we have: ++/// ++/// %E0 = extractelement <2 x double> %U, i32 0 ++/// %V0 = insertvalue [2 x double] undef, double %E0, 0 ++/// %E1 = extractelement <2 x double> %U, i32 1 ++/// %V1 = insertvalue [2 x double] %V0, double %E1, 1 ++/// ++/// and the layout of a <2 x double> is isomorphic to a [2 x double], ++/// then %V1 can be safely approximated by a conceptual "bitcast" of %U. ++/// Note that %U may contain non-undef values where %V1 has undef. ++static Value* likeBitCastFromVector(InstCombiner &IC, Value* V) { ++ Value *U = nullptr; ++ while (auto *IV = dyn_cast(V)) { ++ auto *E = dyn_cast(IV->getInsertedValueOperand()); ++ if (!E) ++ return nullptr; ++ auto *W = E->getVectorOperand(); ++ if (!U) ++ U = W; ++ else if (U != W) ++ return nullptr; ++ auto *CI = dyn_cast(E->getIndexOperand()); ++ if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin()) ++ return nullptr; ++ V = IV->getAggregateOperand(); ++ } ++ if (!isa(V) ||!U) ++ return nullptr; ++ ++ VectorType *UT = cast(U->getType()); ++ Type *VT = V->getType(); ++ // Check that types UT and VT are bitwise isomorphic. ++ const DataLayout &DL = IC.getDataLayout(); ++ if (DL.getTypeSizeInBits(UT) != DL.getTypeSizeInBits(VT)) { ++ return nullptr; ++ } ++ if (ArrayType *AT = dyn_cast(VT)) { ++ if (AT->getNumElements() != UT->getNumElements()) ++ return nullptr; ++ } else { ++ StructType *ST = cast(VT); ++ if (ST->getNumElements() != UT->getNumElements()) ++ return nullptr; ++ for (const Type *EltT : ST->elements()) { ++ if (EltT != UT->getElementType()) ++ return nullptr; ++ } ++ } ++ return U; ++} ++ + /// \brief Combine stores to match the type of value being stored. + /// + /// The core idea here is that the memory does not have any intrinsic type and +@@ -914,6 +969,11 @@ + return true; + } + ++ if (Value *U = likeBitCastFromVector(IC, V)) { ++ combineStoreToNewValue(IC, SI, U); ++ return true; ++ } ++ + // FIXME: We should also canonicalize loads of vectors when their elements are + // cast to other types. + return false; +Index: test/Transforms/InstCombine/insert-val-extract-elem.ll +=================================================================== +--- test/Transforms/InstCombine/insert-val-extract-elem.ll ++++ test/Transforms/InstCombine/insert-val-extract-elem.ll +@@ -0,0 +1,53 @@ ++; RUN: opt -S -instcombine %s | FileCheck %s ++ ++; CHECK-NOT: insertvalue ++; CHECK-NOT: extractelement ++; CHECK: store <2 x double> ++define void @julia_2xdouble([2 x double]* sret, <2 x double>*) { ++top: ++ %x = load <2 x double>, <2 x double>* %1 ++ %x0 = extractelement <2 x double> %x, i32 0 ++ %i0 = insertvalue [2 x double] undef, double %x0, 0 ++ %x1 = extractelement <2 x double> %x, i32 1 ++ %i1 = insertvalue [2 x double] %i0, double %x1, 1 ++ store [2 x double] %i1, [2 x double]* %0, align 4 ++ ret void ++} ++ ++; CHECK-NOT: insertvalue ++; CHECK-NOT: extractelement ++; CHECK: store <4 x float> ++define void @julia_4xfloat([4 x float]* sret, <4 x float>*) { ++top: ++ %x = load <4 x float>, <4 x float>* %1 ++ %x0 = extractelement <4 x float> %x, i32 0 ++ %i0 = insertvalue [4 x float] undef, float %x0, 0 ++ %x1 = extractelement <4 x float> %x, i32 1 ++ %i1 = insertvalue [4 x float] %i0, float %x1, 1 ++ %x2 = extractelement <4 x float> %x, i32 2 ++ %i2 = insertvalue [4 x float] %i1, float %x2, 2 ++ %x3 = extractelement <4 x float> %x, i32 3 ++ %i3 = insertvalue [4 x float] %i2, float %x3, 3 ++ store [4 x float] %i3, [4 x float]* %0, align 4 ++ ret void ++} ++ ++%pseudovec = type { float, float, float, float } ++ ++; CHECK-NOT: insertvalue ++; CHECK-NOT: extractelement ++; CHECK: store <4 x float> ++define void @julia_pseudovec(%pseudovec* sret, <4 x float>*) { ++top: ++ %x = load <4 x float>, <4 x float>* %1 ++ %x0 = extractelement <4 x float> %x, i32 0 ++ %i0 = insertvalue %pseudovec undef, float %x0, 0 ++ %x1 = extractelement <4 x float> %x, i32 1 ++ %i1 = insertvalue %pseudovec %i0, float %x1, 1 ++ %x2 = extractelement <4 x float> %x, i32 2 ++ %i2 = insertvalue %pseudovec %i1, float %x2, 2 ++ %x3 = extractelement <4 x float> %x, i32 3 ++ %i3 = insertvalue %pseudovec %i2, float %x3, 3 ++ store %pseudovec %i3, %pseudovec* %0, align 4 ++ ret void ++}