diff --git a/deps/Makefile b/deps/Makefile
index 68be2c6ca4aca..0f4163e33932c 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -658,6 +658,12 @@ $$(LLVM_SRC_DIR)/$1.patch-applied: $(LLVM_SRC_DIR)/configure | $$(SRCDIR)/$1.pat
 	echo 1 > $$@
 LLVM_PATCH_LIST += $$(LLVM_SRC_DIR)/$1.patch-applied
 endef
+define LLVM_PATCH0
+$$(LLVM_SRC_DIR)/$1.patch-applied: $(LLVM_SRC_DIR)/configure | $$(SRCDIR)/$1.patch
+	cd $$(LLVM_SRC_DIR) && patch -p0 < $$(SRCDIR)/$1.patch
+	echo 1 > $$@
+LLVM_PATCH_LIST += $$(LLVM_SRC_DIR)/$1.patch-applied
+endef
 ifeq ($(LLVM_VER),3.3)
 $(eval $(call LLVM_PATCH,llvm-3.3))
 $(eval $(call LLVM_PATCH,instcombine-llvm-3.3))
@@ -671,7 +677,10 @@ else ifeq ($(LLVM_VER),3.7.1)
 $(eval $(call LLVM_PATCH,llvm-3.7.1))
 $(eval $(call LLVM_PATCH,llvm-3.7.1_2))
 $(eval $(call LLVM_PATCH,llvm-3.7.1_3))
+$(eval $(call LLVM_PATCH0,llvm-D14260))
 $(LLVM_SRC_DIR)/llvm-3.7.1_2.patch-applied: $(LLVM_SRC_DIR)/llvm-3.7.1.patch-applied
+$(LLVM_SRC_DIR)/llvm-3.7.1_3.patch-applied: $(LLVM_SRC_DIR)/llvm-3.7.1_2.patch-applied
+$(LLVM_SRC_DIR)/llvm-D14260.patch-applied: $(LLVM_SRC_DIR)/llvm-3.7.1_3.patch-applied
 endif # LLVM_VER
 
 ifeq ($(LLVM_VER),3.7.1)
diff --git a/deps/llvm-D14260.patch b/deps/llvm-D14260.patch
new file mode 100644
index 0000000000000..ab4bc5b9329f1
--- /dev/null
+++ b/deps/llvm-D14260.patch
@@ -0,0 +1,136 @@
+Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+===================================================================
+--- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
++++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+@@ -879,6 +879,61 @@
+   return nullptr;
+ }
+ 
++/// \brief Look for extractelement/insertvalue sequence that acts like a bitcast.
++///
++/// \returns underlying value that was "cast", or nullptr otherwise.
++///
++/// For example, if we have:
++///
++///     %E0 = extractelement <2 x double> %U, i32 0
++///     %V0 = insertvalue [2 x double] undef, double %E0, 0
++///     %E1 = extractelement <2 x double> %U, i32 1
++///     %V1 = insertvalue [2 x double] %V0, double %E1, 1
++///
++/// and the layout of a <2 x double> is isomorphic to a [2 x double],
++/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
++/// Note that %U may contain non-undef values where %V1 has undef.
++static Value* likeBitCastFromVector(InstCombiner &IC, Value* V) {
++  Value *U = nullptr;
++  while (auto *IV = dyn_cast<InsertValueInst>(V)) {
++    auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
++    if (!E)
++      return nullptr;
++    auto *W = E->getVectorOperand();
++    if (!U)
++      U = W;
++    else if (U != W)
++      return nullptr;
++    auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
++    if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
++      return nullptr;
++    V = IV->getAggregateOperand();
++  }
++  if (!isa<UndefValue>(V) ||!U)
++    return nullptr;
++
++  VectorType *UT = cast<VectorType>(U->getType());
++  Type *VT = V->getType();
++  // Check that types UT and VT are bitwise isomorphic.
++  const DataLayout &DL = IC.getDataLayout();
++  if (DL.getTypeSizeInBits(UT) != DL.getTypeSizeInBits(VT)) {
++    return nullptr;
++  }
++  if (ArrayType *AT = dyn_cast<ArrayType>(VT)) {
++    if (AT->getNumElements() != UT->getNumElements())
++      return nullptr;
++  } else {
++    StructType *ST = cast<StructType>(VT);
++    if (ST->getNumElements() != UT->getNumElements())
++      return nullptr;
++    for (const Type *EltT : ST->elements()) {
++      if (EltT != UT->getElementType())
++        return nullptr;
++    }
++  }
++  return U;
++}
++
+ /// \brief Combine stores to match the type of value being stored.
+ ///
+ /// The core idea here is that the memory does not have any intrinsic type and
+@@ -914,6 +969,11 @@
+     return true;
+   }
+ 
++  if (Value *U = likeBitCastFromVector(IC, V)) {
++    combineStoreToNewValue(IC, SI, U);
++    return true;
++  }
++
+   // FIXME: We should also canonicalize loads of vectors when their elements are
+   // cast to other types.
+   return false;
+Index: test/Transforms/InstCombine/insert-val-extract-elem.ll
+===================================================================
+--- test/Transforms/InstCombine/insert-val-extract-elem.ll
++++ test/Transforms/InstCombine/insert-val-extract-elem.ll
+@@ -0,0 +1,53 @@
++; RUN: opt -S -instcombine %s | FileCheck %s
++
++; CHECK-NOT: insertvalue
++; CHECK-NOT: extractelement
++; CHECK: store <2 x double>
++define void @julia_2xdouble([2 x double]* sret, <2 x double>*) {
++top:
++  %x = load <2 x double>, <2 x double>* %1
++  %x0 = extractelement <2 x double> %x, i32 0
++  %i0 = insertvalue [2 x double] undef, double %x0, 0
++  %x1 = extractelement <2 x double> %x, i32 1
++  %i1 = insertvalue [2 x double] %i0, double %x1, 1
++  store [2 x double] %i1, [2 x double]* %0, align 4
++  ret void
++}
++
++; CHECK-NOT: insertvalue
++; CHECK-NOT: extractelement
++; CHECK: store <4 x float>
++define void @julia_4xfloat([4 x float]* sret, <4 x float>*) {
++top:
++  %x = load <4 x float>, <4 x float>* %1
++  %x0 = extractelement <4 x float> %x, i32 0
++  %i0 = insertvalue [4 x float] undef, float %x0, 0
++  %x1 = extractelement <4 x float> %x, i32 1
++  %i1 = insertvalue [4 x float] %i0, float %x1, 1
++  %x2 = extractelement <4 x float> %x, i32 2
++  %i2 = insertvalue [4 x float] %i1, float %x2, 2
++  %x3 = extractelement <4 x float> %x, i32 3
++  %i3 = insertvalue [4 x float] %i2, float %x3, 3
++  store [4 x float] %i3, [4 x float]* %0, align 4
++  ret void
++}
++
++%pseudovec = type { float, float, float, float }
++
++; CHECK-NOT: insertvalue
++; CHECK-NOT: extractelement
++; CHECK: store <4 x float>
++define void @julia_pseudovec(%pseudovec* sret, <4 x float>*) {
++top:
++  %x = load <4 x float>, <4 x float>* %1
++  %x0 = extractelement <4 x float> %x, i32 0
++  %i0 = insertvalue %pseudovec undef, float %x0, 0
++  %x1 = extractelement <4 x float> %x, i32 1
++  %i1 = insertvalue %pseudovec %i0, float %x1, 1
++  %x2 = extractelement <4 x float> %x, i32 2
++  %i2 = insertvalue %pseudovec %i1, float %x2, 2
++  %x3 = extractelement <4 x float> %x, i32 3
++  %i3 = insertvalue %pseudovec %i2, float %x3, 3
++  store %pseudovec %i3, %pseudovec* %0, align 4
++  ret void
++}