Skip to content

Commit

Permalink
Fix a bug around Vector4.Distance and Sse41.Insert lowering (#81725)
Browse files Browse the repository at this point in the history
* Adding a regression test for #81585

* Fixing a bug around Sse41.Insert lowering

* Ensure that Distance/DistanceSquared are correctly imported

* Account for another case around Sse41.Insert chain folding
  • Loading branch information
tannergooding committed Feb 7, 2023
1 parent 41772ba commit 32ea339
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 46 deletions.
8 changes: 7 additions & 1 deletion src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1314,10 +1314,14 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
// the zmask from op1. We expect that op2 has already been
// lowered and therefore the containment checks have happened

// Since this is a newer operation, we need to account for
// the possibility of `op1Intrinsic` zeroing the same element
// we're setting here.

assert(op1Intrinsic->Op(2)->isContained());

ssize_t op1Ival = op1Idx->AsIntConCommon()->IconValue();
ival |= (op1Ival & 0x0F);
ival |= ((op1Ival & 0x0F) & ~(1 << count_d));
op3->AsIntConCommon()->SetIconValue(ival);

// Then we'll just carry the original non-zero input and
Expand All @@ -1335,6 +1339,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
// account, we can basically do the same thing here by merging this
// zmask into the ival from op1.

// Since this is a later op, direct merging is safe

ssize_t op1Ival = op1Idx->AsIntConCommon()->IconValue();
ival = op1Ival | zmask;
op3->AsIntConCommon()->SetIconValue(ival);
Expand Down
66 changes: 33 additions & 33 deletions src/coreclr/jit/simdashwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -946,39 +946,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
/* isSimdAsHWIntrinsic */ true);
}

case NI_Vector2_Distance:
case NI_Vector3_Distance:
case NI_Vector4_Distance:
{
op1 = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ true);

GenTree* clonedOp1;
op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, CHECK_SPILL_ALL,
nullptr DEBUGARG("Clone diff for vector distance"));

op1 = gtNewSimdDotProdNode(retType, op1, clonedOp1, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ true);

return new (this, GT_INTRINSIC)
GenTreeIntrinsic(simdBaseType, op1, NI_System_Math_Sqrt, NO_METHOD_HANDLE);
}

case NI_Vector2_DistanceSquared:
case NI_Vector3_DistanceSquared:
case NI_Vector4_DistanceSquared:
{
op1 = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ true);

GenTree* clonedOp1;
op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, CHECK_SPILL_ALL,
nullptr DEBUGARG("Clone diff for vector distance squared"));

return gtNewSimdDotProdNode(retType, op1, clonedOp1, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ true);
}

case NI_VectorT128_Floor:
#if defined(TARGET_XARCH)
case NI_VectorT256_Floor:
Expand Down Expand Up @@ -1382,6 +1349,39 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
break;
}

case NI_Vector2_Distance:
case NI_Vector3_Distance:
case NI_Vector4_Distance:
{
op1 = gtNewSimdBinOpNode(GT_SUB, simdType, op1, op2, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ true);

GenTree* clonedOp1;
op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, CHECK_SPILL_ALL,
nullptr DEBUGARG("Clone diff for vector distance"));

op1 = gtNewSimdDotProdNode(retType, op1, clonedOp1, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ true);

return new (this, GT_INTRINSIC)
GenTreeIntrinsic(retType, op1, NI_System_Math_Sqrt, NO_METHOD_HANDLE);
}

case NI_Vector2_DistanceSquared:
case NI_Vector3_DistanceSquared:
case NI_Vector4_DistanceSquared:
{
op1 = gtNewSimdBinOpNode(GT_SUB, simdType, op1, op2, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ true);

GenTree* clonedOp1;
op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, CHECK_SPILL_ALL,
nullptr DEBUGARG("Clone diff for vector distance squared"));

return gtNewSimdDotProdNode(retType, op1, clonedOp1, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ true);
}

case NI_Quaternion_Divide:
case NI_Vector2_Divide:
case NI_Vector2_op_Division:
Expand Down
Loading

0 comments on commit 32ea339

Please sign in to comment.