-
Notifications
You must be signed in to change notification settings - Fork 12.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[msan] Support vst{2,3,4}_lane instructions #101215
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-compiler-rt-sanitizer Author: Thurston Dang (thurstond) ChangesThis generalizes MSan's Arm NEON vst support, to include the lane-specific variants. This also updates the test from #100645. Patch is 34.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101215.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 45b3edf2b8f23..fefca9e7c49c7 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3873,38 +3873,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
- /// Handle Arm NEON vector store intrinsics (vst{2,3,4} and vst1x_{2,3,4}).
+ /// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
+ /// and vst{2,3,4}lane).
///
/// Arm NEON vector store intrinsics have the output address (pointer) as the
- /// last argument, with the initial arguments being the inputs. They return
- /// void.
+ /// last argument, with the initial arguments being the inputs (and lane
+ /// number for vst{2,3,4}lane). They return void.
///
/// - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
/// abcdabcdabcdabcd... into *outP
/// - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
/// writes aaaa...bbbb...cccc...dddd... into *outP
+ /// - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
/// These instructions can all be instrumented with essentially the same
/// MSan logic, simply by applying the corresponding intrinsic to the shadow.
- void handleNEONVectorStoreIntrinsic(IntrinsicInst &I) {
+ void handleNEONVectorStoreIntrinsic(IntrinsicInst &I, bool useLane) {
IRBuilder<> IRB(&I);
// Don't use getNumOperands() because it includes the callee
int numArgOperands = I.arg_size();
- assert(numArgOperands >= 1);
- // The last arg operand is the output
+ // The last arg operand is the output (pointer)
+ assert(numArgOperands >= 1);
Value *Addr = I.getArgOperand(numArgOperands - 1);
assert(Addr->getType()->isPointerTy());
+ unsigned int skipTrailingOperands = 1;
if (ClCheckAccessAddress)
insertShadowCheck(Addr, &I);
- SmallVector<Value *, 8> Shadows;
- // Every arg operand, other than the last one, is an input vector
- for (int i = 0; i < numArgOperands - 1; i++) {
+ // Second-last operand is the lane number (for vst{2,3,4}lane)
+ if (useLane) {
+ skipTrailingOperands ++;
+ assert(numArgOperands >= (int)skipTrailingOperands);
+ assert(isa<IntegerType>(I.getArgOperand(numArgOperands - skipTrailingOperands)->getType()));
+ }
+
+ SmallVector<Value *, 8> ShadowArgs;
+ // All the initial operands are the inputs
+ for (unsigned int i = 0; i < numArgOperands - skipTrailingOperands; i++) {
assert(isa<FixedVectorType>(I.getArgOperand(i)->getType()));
Value *Shadow = getShadow(&I, i);
- Shadows.append(1, Shadow);
+ ShadowArgs.append(1, Shadow);
}
// MSan's GetShadowTy assumes the LHS is the type we want the shadow for
@@ -3921,20 +3931,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
FixedVectorType *OutputVectorTy = FixedVectorType::get(
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getElementType(),
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements() *
- (numArgOperands - 1));
+ (numArgOperands - skipTrailingOperands));
Type *OutputShadowTy = getShadowTy(OutputVectorTy);
+ if (useLane)
+ ShadowArgs.append(1, I.getArgOperand(numArgOperands - skipTrailingOperands));
+
Value *OutputShadowPtr, *OutputOriginPtr;
// AArch64 NEON does not need alignment (unless OS requires it)
std::tie(OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr(
Addr, IRB, OutputShadowTy, Align(1), /*isStore*/ true);
- Shadows.append(1, OutputShadowPtr);
+ ShadowArgs.append(1, OutputShadowPtr);
- // CreateIntrinsic will select the correct (integer) type for the
- // intrinsic; the original instruction I may have either integer- or
- // float-type inputs.
CallInst *CI =
- IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), Shadows);
+ IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
setShadow(&I, CI);
if (MS.TrackOrigins) {
@@ -3942,8 +3952,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// more accurately track the origins (e.g., if both inputs are
// uninitialized for vst2, we currently blame the second input, even
// though part of the output depends only on the first input).
+ //
+ // This is particularly imprecise for vst{2,3,4}lane, since only one
+ // lane of each input is actually copied to the output.
OriginCombiner OC(this, IRB);
- for (int i = 0; i < numArgOperands - 1; i++)
+ for (unsigned int i = 0; i < numArgOperands - skipTrailingOperands; i++)
OC.Add(I.getArgOperand(i));
const DataLayout &DL = F.getDataLayout();
@@ -4299,7 +4312,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::aarch64_neon_st2:
case Intrinsic::aarch64_neon_st3:
case Intrinsic::aarch64_neon_st4: {
- handleNEONVectorStoreIntrinsic(I);
+ handleNEONVectorStoreIntrinsic(I, false);
+ break;
+ }
+
+ case Intrinsic::aarch64_neon_st2lane:
+ case Intrinsic::aarch64_neon_st3lane:
+ case Intrinsic::aarch64_neon_st4lane: {
+ handleNEONVectorStoreIntrinsic(I, true);
break;
}
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll
index b7a2721e80e2b..9ed364df3e677 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll
@@ -1601,22 +1601,20 @@ define void @st1lane0_ro_1d_double(<1 x double> %A, ptr %D, i64 %offset) sanitiz
define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st2lane_16b(
; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 1, ptr [[TMP6]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
-; CHECK: 6:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK: 8:
; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[A]], <16 x i8> [[B]], i64 1, ptr [[D]])
; CHECK-NEXT: ret void
;
@@ -1627,22 +1625,20 @@ define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, ptr %D) sanitize_memory {
define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st2lane_8h(
; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i64 1, ptr [[TMP6]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
-; CHECK: 6:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK: 8:
; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[A]], <8 x i16> [[B]], i64 1, ptr [[D]])
; CHECK-NEXT: ret void
;
@@ -1653,22 +1649,20 @@ define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, ptr %D) sanitize_memory {
define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st2lane_4s(
; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i64 1, ptr [[TMP6]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
-; CHECK: 6:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK: 8:
; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> [[A]], <4 x i32> [[B]], i64 1, ptr [[D]])
; CHECK-NEXT: ret void
;
@@ -1679,22 +1673,20 @@ define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, ptr %D) sanitize_memory {
define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st2lane_2d(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], i64 1, ptr [[TMP6]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
-; CHECK: 6:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK: 8:
; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[A]], <2 x i64> [[B]], i64 1, ptr [[D]])
; CHECK-NEXT: ret void
;
@@ -1710,22 +1702,17 @@ declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr)
define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st3lane_16b(
; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 1, ptr [[TMP7]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1740,22 +1727,17 @@ define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %D) sanit
define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st3lane_8h(
; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], i64 1, ptr [[TMP7]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1770,22 +1752,17 @@ define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %D) saniti
define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st3lane_4s(
; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[D]] to i...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
This generalizes MSan's Arm NEON vst support, to include the lane-specific variants. This also updates the test from llvm#100645.
1292560
to
e906b6c
Compare
* 'main' of https://github.com/llvm/llvm-project: (700 commits) [SandboxIR][NFC] SingleLLVMInstructionImpl class (llvm#102687) [ThinLTO]Clean up 'import-assume-unique-local' flag. (llvm#102424) [nsan] Make #include more conventional [SandboxIR][NFC] Use Tracker.emplaceIfTracking() [libc] Moved range_reduction_double ifdef statement (llvm#102659) [libc] Fix CFP long double and add tests (llvm#102660) [TargetLowering] Handle vector types in expandFixedPointMul (llvm#102635) [compiler-rt][NFC] Replace environment variable with %t (llvm#102197) [UnitTests] Convert a test to use opaque pointers (llvm#102668) [CodeGen][NFCI] Don't re-implement parts of ASTContext::getIntWidth (llvm#101765) [SandboxIR] Clean up tracking code with the help of emplaceIfTracking() (llvm#102406) [mlir][bazel] remove extra blanks in mlir-tblgen test [NVPTX][NFC] Update tests to use bfloat type (llvm#101493) [mlir] Add support for parsing nested PassPipelineOptions (llvm#101118) [mlir][bazel] add missing td dependency in mlir-tblgen test [flang][cuda] Fix lib dependency [libc] Clean up remaining use of *_WIDTH macros in printf (llvm#102679) [flang][cuda] Convert cuf.alloc for box to fir.alloca in device context (llvm#102662) [SandboxIR] Implement the InsertElementInst class (llvm#102404) [libc] Fix use of cpp::numeric_limits<...>::digits (llvm#102674) [mlir][ODS] Verify type constraints in Types and Attributes (llvm#102326) [LTO] enable `ObjCARCContractPass` only on optimized build (llvm#101114) [mlir][ODS] Consistent `cppType` / `cppClassName` usage (llvm#102657) [lldb] Move definition of SBSaveCoreOptions dtor out of header (llvm#102539) [libc] Use cpp::numeric_limits in preference to C23 <limits.h> macros (llvm#102665) [clang] Implement -fptrauth-auth-traps. (llvm#102417) [LLVM][rtsan] rtsan transform to preserve CFGAnalyses (llvm#102651) Revert "[AMDGPU] Move `AMDGPUAttributorPass` to full LTO post link stage (llvm#102086)" [RISCV][GISel] Add missing tests for G_CTLZ/CTTZ instruction selection. NFC Return available function types for BindingDecls. (llvm#102196) [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (llvm#102416) [RISCV] Remove riscv-experimental-rv64-legal-i32. (llvm#102509) [RISCV] Move PseudoVSET(I)VLI expansion to use PseudoInstExpansion. (llvm#102496) [NVPTX] support switch statement with brx.idx (reland) (llvm#102550) [libc][newhdrgen]sorted function names in yaml (llvm#102544) [GlobalIsel] Combine G_ADD and G_SUB with constants (llvm#97771) Suppress spurious warnings due to R_RISCV_SET_ULEB128 [scudo] Separated committed and decommitted entries. (llvm#101409) [MIPS] Fix missing ANDI optimization (llvm#97689) [Clang] Add env var for nvptx-arch/amdgpu-arch timeout (llvm#102521) [asan] Switch allocator to dynamic base address (llvm#98511) [AMDGPU] Move `AMDGPUAttributorPass` to full LTO post link stage (llvm#102086) [libc][math][c23] Add fadd{l,f128} C23 math functions (llvm#102531) [mlir][bazel] revert bazel rule change for DLTITransformOps [msan] Support vst{2,3,4}_lane instructions (llvm#101215) Revert "[MLIR][DLTI][Transform] Introduce transform.dlti.query (llvm#101561)" [X86] pr57673.ll - generate MIR test checks [mlir][vector][test] Split tests from vector-transfer-flatten.mlir (llvm#102584) [mlir][bazel] add bazel rule for DLTITransformOps OpenMPOpt: Remove dead include [IR] Add method to GlobalVariable to change type of initializer. (llvm#102553) [flang][cuda] Force default allocator in device code (llvm#102238) [llvm] Construct SmallVector<SDValue> with ArrayRef (NFC) (llvm#102578) [MLIR][DLTI][Transform] Introduce transform.dlti.query (llvm#101561) [AMDGPU][AsmParser][NFC] Remove a misleading comment. (llvm#102604) [Arm][AArch64][Clang] Respect function's branch protection attributes. (llvm#101978) [mlir] Verifier: steal bit to track seen instead of set. (llvm#102626) [Clang] Fix Handling of Init Capture with Parameter Packs in LambdaScopeForCallOperatorInstantiationRAII (llvm#100766) [X86] Convert truncsat clamping patterns to use SDPatternMatch. NFC. [gn] Give two scripts argparse.RawDescriptionHelpFormatter [bazel] Add missing dep for the SPIRVToLLVM target [Clang] Simplify specifying passes via -Xoffload-linker (llvm#102483) [bazel] Port for d45de80 [SelectionDAG] Use unaligned store/load to move AVX registers onto stack for `insertelement` (llvm#82130) [Clang][OMPX] Add the code generation for multi-dim `num_teams` (llvm#101407) [ARM] Regenerate big-endian-vmov.ll. NFC [AMDGPU][AsmParser][NFCI] All NamedIntOperands to be of the i32 type. (llvm#102616) [libc][math][c23] Add totalorderl function. (llvm#102564) [mlir][spirv] Support `memref` in `convert-to-spirv` pass (llvm#102534) [MLIR][GPU-LLVM] Convert `gpu.func` to `llvm.func` (llvm#101664) Fix a unit test input file (llvm#102567) [llvm-readobj][COFF] Dump hybrid objects for ARM64X files. (llvm#102245) AMDGPU/NewPM: Port SIFixSGPRCopies to new pass manager (llvm#102614) [MemoryBuiltins] Simplify getCalledFunction() helper (NFC) [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (llvm#102105) [MemoryBuiltins] Handle allocator attributes on call-site LSV/test/AArch64: add missing lit.local.cfg; fix build (llvm#102607) Revert "Enable logf128 constant folding for hosts with 128bit floats (llvm#96287)" [RISCV] Add Syntacore SCR5 RV32/64 processors definition (llvm#102285) [InstCombine] Remove unnecessary RUN line from test (NFC) [flang][OpenMP] Handle multiple ranges in `num_teams` clause (llvm#102535) [mlir][vector] Add tests for scalable vectors in one-shot-bufferize.mlir (llvm#102361) [mlir][vector] Disable `vector.matrix_multiply` for scalable vectors (llvm#102573) [clang] Implement CWG2627 Bit-fields and narrowing conversions (llvm#78112) [NFC] Use references to avoid copying (llvm#99863) Revert "[mlir][ArmSME] Pattern to swap shape_cast(tranpose) with transpose(shape_cast) (llvm#100731)" (llvm#102457) [IRBuilder] Generate nuw GEPs for struct member accesses (llvm#99538) [bazel] Port for 9b06e25 [CodeGen][NewPM] Improve start/stop pass error message CodeGenPassBuilder (llvm#102591) [AArch64] Implement TRBMPAM_EL1 system register (llvm#102485) [InstCombine] Fixing wrong select folding in vectors with undef elements (llvm#102244) [AArch64] Sink operands to fmuladd. (llvm#102297) LSV: document hang reported in llvm#37865 (llvm#102479) Enable logf128 constant folding for hosts with 128bit floats (llvm#96287) [RISCV][clang] Remove bfloat base type in non-zvfbfmin vcreate (llvm#102146) [RISCV][clang] Add missing `zvfbfmin` to `vget_v` intrinsic (llvm#102149) [mlir][vector] Add mask elimination transform (llvm#99314) [Clang][Interp] Fix display of syntactically-invalid note for member function calls (llvm#102170) [bazel] Port for 3fffa6d [DebugInfo][RemoveDIs] Use iterator-inserters in clang (llvm#102006) ... Signed-off-by: Edwiin Kusuma Jaya <kutemeikito0905@gmail.com>
This generalizes MSan's Arm NEON vst support, to include the lane-specific variants.
This also updates the test from #100645.