Skip to content

Commit

Permalink
ARM64-SVE: Fix conditional select for Zeroing predicates #102904 (#10…
Browse files Browse the repository at this point in the history
  • Loading branch information
a74nh committed Aug 13, 2024
1 parent ae49f9e commit 329bff4
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 137 deletions.
18 changes: 13 additions & 5 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,16 +236,18 @@ enum HWIntrinsicFlag : unsigned int
// then the intrinsic should be switched to a scalar only version.
HW_Flag_HasScalarInputVariant = 0x2000000,

// The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low vector register.
HW_Flag_LowVectorOperation = 0x4000000,

// The intrinsic uses a mask in arg1 to select elements present in the result, which zeros inactive elements
// (instead of merging).
HW_Flag_ZeroingMaskedOperation = 0x8000000,

#endif // TARGET_XARCH

// The intrinsic is a FusedMultiplyAdd intrinsic
HW_Flag_FmaIntrinsic = 0x40000000,

#if defined(TARGET_ARM64)
// The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low vector register.
HW_Flag_LowVectorOperation = 0x4000000,
#endif

HW_Flag_CanBenefitFromConstantProp = 0x80000000,
};

Expand Down Expand Up @@ -984,6 +986,12 @@ struct HWIntrinsicInfo
return (flags & HW_Flag_HasScalarInputVariant) != 0;
}

static bool IsZeroingMaskedOperation(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_ZeroingMaskedOperation) != 0;
}

static NamedIntrinsic GetScalarInputVariant(NamedIntrinsic id)
{
assert(HasScalarInputVariant(id));
Expand Down
174 changes: 87 additions & 87 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h

Large diffs are not rendered by default.

10 changes: 9 additions & 1 deletion src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4016,9 +4016,17 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode)
// `trueValue`
GenTreeHWIntrinsic* nestedCndSel = op2->AsHWIntrinsic();
GenTree* nestedOp1 = nestedCndSel->Op(1);
GenTree* nestedOp2 = nestedCndSel->Op(2);
assert(varTypeIsMask(nestedOp1));
assert(nestedOp2->OperIsHWIntrinsic());

if (nestedOp1->IsMaskAllBitsSet())
NamedIntrinsic nestedOp2Id = nestedOp2->AsHWIntrinsic()->GetHWIntrinsicId();

// If the nested op uses Pg/Z, then inactive lanes will result in zeros, so can only transform if
// op3 is all zeros.

if (nestedOp1->IsMaskAllBitsSet() &&
(!HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id) || op3->IsVectorZero()))
{
GenTree* nestedOp2 = nestedCndSel->Op(2);
GenTree* nestedOp3 = nestedCndSel->Op(3);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,7 @@ namespace JIT.HardwareIntrinsics.Arm
test.RunStructFldScenario();

// Validates using inside ConditionalSelect with value falseValue
// Currently, using this operation in ConditionalSelect() gives incorrect result
// when falseReg == targetReg because this instruction uses Pg/Z to update the targetReg
// instead of Pg/M to merge it. As such, the value of falseReg is lost. Ideally, such
// instructions should be marked similar to RMW (a different flag name) to make sure that
// we do not assign falseReg/targetReg same. Then, we would do something like this:
//
// ldnf1sh target, pg/z, [x0]
// sel mask, target, target, falseReg
//
// This needs more careful thinking, so disabling it for now.
// test.ConditionalSelect_FalseOp();
test.ConditionalSelect_FalseOp();

// Validates using inside ConditionalSelect with zero falseValue
test.ConditionalSelect_ZeroOp();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,7 @@ namespace JIT.HardwareIntrinsics.Arm
test.RunStructFldScenario();

// Validates using inside ConditionalSelect with value falseValue
// Currently, using this operation in ConditionalSelect() gives incorrect result
// when falseReg == targetReg because this instruction uses Pg/Z to update the targetReg
// instead of Pg/M to merge it. As such, the value of falseReg is lost. Ideally, such
// instructions should be marked similar to RMW (a different flag name) to make sure that
// we do not assign falseReg/targetReg same. Then, we would do something like this:
//
// ldnf1sh target, pg/z, [x0]
// sel mask, target, target, falseReg
//
// This needs more careful thinking, so disabling it for now.
// test.ConditionalSelect_FalseOp();
test.ConditionalSelect_FalseOp();

// Validates using inside ConditionalSelect with zero falseValue
test.ConditionalSelect_ZeroOp();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,7 @@ namespace JIT.HardwareIntrinsics.Arm
test.RunStructFldScenario();

// Validates using inside ConditionalSelect with value falseValue
// Currently, using this operation in ConditionalSelect() gives incorrect result
// when falseReg == targetReg because this instruction uses Pg/Z to update the targetReg
// instead of Pg/M to merge it. As such, the value of falseReg is lost. Ideally, such
// instructions should be marked similar to RMW (a different flag name) to make sure that
// we do not assign falseReg/targetReg same. Then, we would do something like this:
//
// ldnf1sh target, pg/z, [x0]
// sel mask, target, target, falseReg
//
// This needs more careful thinking, so disabling it for now.
// test.ConditionalSelect_FalseOp();
test.ConditionalSelect_FalseOp();

// Validates using inside ConditionalSelect with zero falseValue
test.ConditionalSelect_ZeroOp();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,7 @@ namespace JIT.HardwareIntrinsics.Arm
test.RunStructFldScenario();

// Validates using inside ConditionalSelect with value falseValue
// Currently, using this operation in ConditionalSelect() gives incorrect result
// when falseReg == targetReg because this instruction uses Pg/Z to update the targetReg
// instead of Pg/M to merge it. As such, the value of falseReg is lost. Ideally, such
// instructions should be marked similar to RMW (a different flag name) to make sure that
// we do not assign falseReg/targetReg same. Then, we would do something like this:
//
// ldnf1sh target, pg/z, [x0]
// sel mask, target, target, falseReg
//
// This needs more careful thinking, so disabling it for now.
// test.ConditionalSelect_FalseOp();
test.ConditionalSelect_FalseOp();

// Validates using inside ConditionalSelect with zero falseValue
test.ConditionalSelect_ZeroOp();
Expand Down

0 comments on commit 329bff4

Please sign in to comment.