[msan] Support vst{2,3,4}_lane instructions #101215

thurstond · 2024-07-30T18:25:35Z

This generalizes MSan's Arm NEON vst support, to include the lane-specific variants.

This also updates the test from #100645.

llvmbot · 2024-07-30T18:26:07Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Thurston Dang (thurstond)

Changes

This generalizes MSan's Arm NEON vst support, to include the lane-specific variants.

This also updates the test from #100645.

Patch is 34.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101215.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+38-18)
(modified) llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll (+88-148)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 45b3edf2b8f23..fefca9e7c49c7 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3873,38 +3873,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
-  /// Handle Arm NEON vector store intrinsics (vst{2,3,4} and vst1x_{2,3,4}).
+  /// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
+  /// and vst{2,3,4}lane).
   ///
   /// Arm NEON vector store intrinsics have the output address (pointer) as the
-  /// last argument, with the initial arguments being the inputs. They return
-  /// void.
+  /// last argument, with the initial arguments being the inputs (and lane
+  /// number for vst{2,3,4}lane). They return void.
   ///
   /// - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
   ///   abcdabcdabcdabcd... into *outP
   /// - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
   ///   writes aaaa...bbbb...cccc...dddd... into *outP
+  /// - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
   /// These instructions can all be instrumented with essentially the same
   /// MSan logic, simply by applying the corresponding intrinsic to the shadow.
-  void handleNEONVectorStoreIntrinsic(IntrinsicInst &I) {
+  void handleNEONVectorStoreIntrinsic(IntrinsicInst &I, bool useLane) {
     IRBuilder<> IRB(&I);
 
     // Don't use getNumOperands() because it includes the callee
     int numArgOperands = I.arg_size();
-    assert(numArgOperands >= 1);
 
-    // The last arg operand is the output
+    // The last arg operand is the output (pointer)
+    assert(numArgOperands >= 1);
     Value *Addr = I.getArgOperand(numArgOperands - 1);
     assert(Addr->getType()->isPointerTy());
+    unsigned int skipTrailingOperands = 1;
 
     if (ClCheckAccessAddress)
       insertShadowCheck(Addr, &I);
 
-    SmallVector<Value *, 8> Shadows;
-    // Every arg operand, other than the last one, is an input vector
-    for (int i = 0; i < numArgOperands - 1; i++) {
+    // Second-last operand is the lane number (for vst{2,3,4}lane)
+    if (useLane) {
+      skipTrailingOperands ++;
+      assert(numArgOperands >= (int)skipTrailingOperands);
+      assert(isa<IntegerType>(I.getArgOperand(numArgOperands - skipTrailingOperands)->getType()));
+    }
+
+    SmallVector<Value *, 8> ShadowArgs;
+    // All the initial operands are the inputs
+    for (unsigned int i = 0; i < numArgOperands - skipTrailingOperands; i++) {
       assert(isa<FixedVectorType>(I.getArgOperand(i)->getType()));
       Value *Shadow = getShadow(&I, i);
-      Shadows.append(1, Shadow);
+      ShadowArgs.append(1, Shadow);
     }
 
     // MSan's GetShadowTy assumes the LHS is the type we want the shadow for
@@ -3921,20 +3931,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     FixedVectorType *OutputVectorTy = FixedVectorType::get(
         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getElementType(),
         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements() *
-            (numArgOperands - 1));
+            (numArgOperands - skipTrailingOperands));
     Type *OutputShadowTy = getShadowTy(OutputVectorTy);
 
+    if (useLane)
+      ShadowArgs.append(1, I.getArgOperand(numArgOperands - skipTrailingOperands));
+
     Value *OutputShadowPtr, *OutputOriginPtr;
     // AArch64 NEON does not need alignment (unless OS requires it)
     std::tie(OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr(
         Addr, IRB, OutputShadowTy, Align(1), /*isStore*/ true);
-    Shadows.append(1, OutputShadowPtr);
+    ShadowArgs.append(1, OutputShadowPtr);
 
-    // CreateIntrinsic will select the correct (integer) type for the
-    // intrinsic; the original instruction I may have either integer- or
-    // float-type inputs.
     CallInst *CI =
-        IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), Shadows);
+        IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
     setShadow(&I, CI);
 
     if (MS.TrackOrigins) {
@@ -3942,8 +3952,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       // more accurately track the origins (e.g., if both inputs are
       // uninitialized for vst2, we currently blame the second input, even
       // though part of the output depends only on the first input).
+      //
+      // This is particularly imprecise for vst{2,3,4}lane, since only one
+      // lane of each input is actually copied to the output.
       OriginCombiner OC(this, IRB);
-      for (int i = 0; i < numArgOperands - 1; i++)
+      for (unsigned int i = 0; i < numArgOperands - skipTrailingOperands; i++)
         OC.Add(I.getArgOperand(i));
 
       const DataLayout &DL = F.getDataLayout();
@@ -4299,7 +4312,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     case Intrinsic::aarch64_neon_st2:
     case Intrinsic::aarch64_neon_st3:
     case Intrinsic::aarch64_neon_st4: {
-      handleNEONVectorStoreIntrinsic(I);
+      handleNEONVectorStoreIntrinsic(I, false);
+      break;
+    }
+
+    case Intrinsic::aarch64_neon_st2lane:
+    case Intrinsic::aarch64_neon_st3lane:
+    case Intrinsic::aarch64_neon_st4lane: {
+      handleNEONVectorStoreIntrinsic(I, true);
       break;
     }
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll
index b7a2721e80e2b..9ed364df3e677 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll
@@ -1601,22 +1601,20 @@ define void @st1lane0_ro_1d_double(<1 x double> %A, ptr %D, i64 %offset) sanitiz
 define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, ptr %D) sanitize_memory {
 ; CHECK-LABEL: define void @st2lane_16b(
 ; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 1, ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
-; CHECK:       6:
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       7:
+; CHECK:       8:
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[A]], <16 x i8> [[B]], i64 1, ptr [[D]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1627,22 +1625,20 @@ define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, ptr %D) sanitize_memory {
 define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, ptr %D) sanitize_memory {
 ; CHECK-LABEL: define void @st2lane_8h(
 ; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i64 1, ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
-; CHECK:       6:
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       7:
+; CHECK:       8:
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[A]], <8 x i16> [[B]], i64 1, ptr [[D]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1653,22 +1649,20 @@ define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, ptr %D) sanitize_memory {
 define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, ptr %D) sanitize_memory {
 ; CHECK-LABEL: define void @st2lane_4s(
 ; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i64 1, ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
-; CHECK:       6:
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       7:
+; CHECK:       8:
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> [[A]], <4 x i32> [[B]], i64 1, ptr [[D]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1679,22 +1673,20 @@ define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, ptr %D) sanitize_memory {
 define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, ptr %D) sanitize_memory {
 ; CHECK-LABEL: define void @st2lane_2d(
 ; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], i64 1, ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
-; CHECK:       6:
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       7:
+; CHECK:       8:
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[A]], <2 x i64> [[B]], i64 1, ptr [[D]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1710,22 +1702,17 @@ declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr)
 define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %D) sanitize_memory {
 ; CHECK-LABEL: define void @st3lane_16b(
 ; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to i128
-; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 1, ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1740,22 +1727,17 @@ define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %D) sanit
 define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %D) sanitize_memory {
 ; CHECK-LABEL: define void @st3lane_8h(
 ; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], i64 1, ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1770,22 +1752,17 @@ define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %D) saniti
 define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %D) sanitize_memory {
 ; CHECK-LABEL: define void @st3lane_4s(
 ; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
-; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[D]] to i...
[truncated]

github-actions · 2024-07-30T18:29:37Z

✅ With the latest revision this PR passed the C/C++ code formatter.

This generalizes MSan's Arm NEON vst support, to include the lane-specific variants. This also updates the test from llvm#100645.

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

* 'main' of https://github.com/llvm/llvm-project: (700 commits) [SandboxIR][NFC] SingleLLVMInstructionImpl class (llvm#102687) [ThinLTO]Clean up 'import-assume-unique-local' flag. (llvm#102424) [nsan] Make #include more conventional [SandboxIR][NFC] Use Tracker.emplaceIfTracking() [libc] Moved range_reduction_double ifdef statement (llvm#102659) [libc] Fix CFP long double and add tests (llvm#102660) [TargetLowering] Handle vector types in expandFixedPointMul (llvm#102635) [compiler-rt][NFC] Replace environment variable with %t (llvm#102197) [UnitTests] Convert a test to use opaque pointers (llvm#102668) [CodeGen][NFCI] Don't re-implement parts of ASTContext::getIntWidth (llvm#101765) [SandboxIR] Clean up tracking code with the help of emplaceIfTracking() (llvm#102406) [mlir][bazel] remove extra blanks in mlir-tblgen test [NVPTX][NFC] Update tests to use bfloat type (llvm#101493) [mlir] Add support for parsing nested PassPipelineOptions (llvm#101118) [mlir][bazel] add missing td dependency in mlir-tblgen test [flang][cuda] Fix lib dependency [libc] Clean up remaining use of *_WIDTH macros in printf (llvm#102679) [flang][cuda] Convert cuf.alloc for box to fir.alloca in device context (llvm#102662) [SandboxIR] Implement the InsertElementInst class (llvm#102404) [libc] Fix use of cpp::numeric_limits<...>::digits (llvm#102674) [mlir][ODS] Verify type constraints in Types and Attributes (llvm#102326) [LTO] enable `ObjCARCContractPass` only on optimized build (llvm#101114) [mlir][ODS] Consistent `cppType` / `cppClassName` usage (llvm#102657) [lldb] Move definition of SBSaveCoreOptions dtor out of header (llvm#102539) [libc] Use cpp::numeric_limits in preference to C23 <limits.h> macros (llvm#102665) [clang] Implement -fptrauth-auth-traps. (llvm#102417) [LLVM][rtsan] rtsan transform to preserve CFGAnalyses (llvm#102651) Revert "[AMDGPU] Move `AMDGPUAttributorPass` to full LTO post link stage (llvm#102086)" [RISCV][GISel] Add missing tests for G_CTLZ/CTTZ instruction selection. NFC Return available function types for BindingDecls. (llvm#102196) [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (llvm#102416) [RISCV] Remove riscv-experimental-rv64-legal-i32. (llvm#102509) [RISCV] Move PseudoVSET(I)VLI expansion to use PseudoInstExpansion. (llvm#102496) [NVPTX] support switch statement with brx.idx (reland) (llvm#102550) [libc][newhdrgen]sorted function names in yaml (llvm#102544) [GlobalIsel] Combine G_ADD and G_SUB with constants (llvm#97771) Suppress spurious warnings due to R_RISCV_SET_ULEB128 [scudo] Separated committed and decommitted entries. (llvm#101409) [MIPS] Fix missing ANDI optimization (llvm#97689) [Clang] Add env var for nvptx-arch/amdgpu-arch timeout (llvm#102521) [asan] Switch allocator to dynamic base address (llvm#98511) [AMDGPU] Move `AMDGPUAttributorPass` to full LTO post link stage (llvm#102086) [libc][math][c23] Add fadd{l,f128} C23 math functions (llvm#102531) [mlir][bazel] revert bazel rule change for DLTITransformOps [msan] Support vst{2,3,4}_lane instructions (llvm#101215) Revert "[MLIR][DLTI][Transform] Introduce transform.dlti.query (llvm#101561)" [X86] pr57673.ll - generate MIR test checks [mlir][vector][test] Split tests from vector-transfer-flatten.mlir (llvm#102584) [mlir][bazel] add bazel rule for DLTITransformOps OpenMPOpt: Remove dead include [IR] Add method to GlobalVariable to change type of initializer. (llvm#102553) [flang][cuda] Force default allocator in device code (llvm#102238) [llvm] Construct SmallVector<SDValue> with ArrayRef (NFC) (llvm#102578) [MLIR][DLTI][Transform] Introduce transform.dlti.query (llvm#101561) [AMDGPU][AsmParser][NFC] Remove a misleading comment. (llvm#102604) [Arm][AArch64][Clang] Respect function's branch protection attributes. (llvm#101978) [mlir] Verifier: steal bit to track seen instead of set. (llvm#102626) [Clang] Fix Handling of Init Capture with Parameter Packs in LambdaScopeForCallOperatorInstantiationRAII (llvm#100766) [X86] Convert truncsat clamping patterns to use SDPatternMatch. NFC. [gn] Give two scripts argparse.RawDescriptionHelpFormatter [bazel] Add missing dep for the SPIRVToLLVM target [Clang] Simplify specifying passes via -Xoffload-linker (llvm#102483) [bazel] Port for d45de80 [SelectionDAG] Use unaligned store/load to move AVX registers onto stack for `insertelement` (llvm#82130) [Clang][OMPX] Add the code generation for multi-dim `num_teams` (llvm#101407) [ARM] Regenerate big-endian-vmov.ll. NFC [AMDGPU][AsmParser][NFCI] All NamedIntOperands to be of the i32 type. (llvm#102616) [libc][math][c23] Add totalorderl function. (llvm#102564) [mlir][spirv] Support `memref` in `convert-to-spirv` pass (llvm#102534) [MLIR][GPU-LLVM] Convert `gpu.func` to `llvm.func` (llvm#101664) Fix a unit test input file (llvm#102567) [llvm-readobj][COFF] Dump hybrid objects for ARM64X files. (llvm#102245) AMDGPU/NewPM: Port SIFixSGPRCopies to new pass manager (llvm#102614) [MemoryBuiltins] Simplify getCalledFunction() helper (NFC) [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (llvm#102105) [MemoryBuiltins] Handle allocator attributes on call-site LSV/test/AArch64: add missing lit.local.cfg; fix build (llvm#102607) Revert "Enable logf128 constant folding for hosts with 128bit floats (llvm#96287)" [RISCV] Add Syntacore SCR5 RV32/64 processors definition (llvm#102285) [InstCombine] Remove unnecessary RUN line from test (NFC) [flang][OpenMP] Handle multiple ranges in `num_teams` clause (llvm#102535) [mlir][vector] Add tests for scalable vectors in one-shot-bufferize.mlir (llvm#102361) [mlir][vector] Disable `vector.matrix_multiply` for scalable vectors (llvm#102573) [clang] Implement CWG2627 Bit-fields and narrowing conversions (llvm#78112) [NFC] Use references to avoid copying (llvm#99863) Revert "[mlir][ArmSME] Pattern to swap shape_cast(tranpose) with transpose(shape_cast) (llvm#100731)" (llvm#102457) [IRBuilder] Generate nuw GEPs for struct member accesses (llvm#99538) [bazel] Port for 9b06e25 [CodeGen][NewPM] Improve start/stop pass error message CodeGenPassBuilder (llvm#102591) [AArch64] Implement TRBMPAM_EL1 system register (llvm#102485) [InstCombine] Fixing wrong select folding in vectors with undef elements (llvm#102244) [AArch64] Sink operands to fmuladd. (llvm#102297) LSV: document hang reported in llvm#37865 (llvm#102479) Enable logf128 constant folding for hosts with 128bit floats (llvm#96287) [RISCV][clang] Remove bfloat base type in non-zvfbfmin vcreate (llvm#102146) [RISCV][clang] Add missing `zvfbfmin` to `vget_v` intrinsic (llvm#102149) [mlir][vector] Add mask elimination transform (llvm#99314) [Clang][Interp] Fix display of syntactically-invalid note for member function calls (llvm#102170) [bazel] Port for 3fffa6d [DebugInfo][RemoveDIs] Use iterator-inserters in clang (llvm#102006) ... Signed-off-by: Edwiin Kusuma Jaya <kutemeikito0905@gmail.com>

thurstond requested a review from vitalybuka July 30, 2024 18:25

llvmbot added compiler-rt:sanitizer llvm:transforms labels Jul 30, 2024

thurstond added 2 commits August 8, 2024 17:13

[msan] Support vst{2,3,4}_lane instructions

bb20ee1

This generalizes MSan's Arm NEON vst support, to include the lane-specific variants. This also updates the test from llvm#100645.

clang-format

e906b6c

vitalybuka force-pushed the msan_vst_lane branch from 1292560 to e906b6c Compare August 9, 2024 00:13

vitalybuka reviewed Aug 9, 2024

View reviewed changes

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp Outdated Show resolved Hide resolved

vitalybuka reviewed Aug 9, 2024

View reviewed changes

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp Outdated Show resolved Hide resolved

vitalybuka approved these changes Aug 9, 2024

View reviewed changes

Address Vitaly's feedback

d695285

thurstond merged commit cb5ec37 into llvm:main Aug 9, 2024
4 of 6 checks passed

thurstond mentioned this pull request Oct 30, 2024

MSAN reports false positives on interleaved storage on ARM AArch64 #72848

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[msan] Support vst{2,3,4}_lane instructions #101215

[msan] Support vst{2,3,4}_lane instructions #101215

thurstond commented Jul 30, 2024

llvmbot commented Jul 30, 2024 •

edited

Loading

github-actions bot commented Jul 30, 2024 •

edited

Loading

[msan] Support vst{2,3,4}_lane instructions #101215

[msan] Support vst{2,3,4}_lane instructions #101215

Conversation

thurstond commented Jul 30, 2024

llvmbot commented Jul 30, 2024 • edited Loading

github-actions bot commented Jul 30, 2024 • edited Loading

llvmbot commented Jul 30, 2024 •

edited

Loading

github-actions bot commented Jul 30, 2024 •

edited

Loading