From 45582878d7b137a10aad03a754ceec6715b2ed1e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 17 Jul 2021 20:42:02 +0300 Subject: [PATCH 1/7] Optimize VectorX.Create via TestZ --- src/coreclr/jit/importer.cpp | 29 ++++++++++- src/coreclr/jit/morph.cpp | 48 +++++++++++++++++++ .../System/Runtime/Intrinsics/Vector128_1.cs | 9 ++++ .../System/Runtime/Intrinsics/Vector256_1.cs | 12 +---- 4 files changed, 86 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 5d6c0d7354f52..ea41eff2d0570 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -20219,8 +20219,33 @@ GenTree* Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, In // TODO-1stClassStructs: We currently do not reuse an existing lclVar // if it is a struct, because it requires some additional handling. - if (!varTypeIsStruct(lclTyp) && !argInfo.argHasSideEff && !argInfo.argHasGlobRef && - !argInfo.argHasCallerLocalRef) + bool substitute = false; + switch (argNode->OperGet()) + { +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + { + // Enable for all parameterless (=invariant) hw intrinsics such as + // Vector128<>.Empty and Vector256<>.AllBitSets. We might consider + // doing that for Vector.Create(cns) as well. + if ((argNode->gtGetOp1() == nullptr) && (argNode->gtGetOp2() == nullptr)) + { + substitute = true; + } + break; + } +#endif + + // TODO: Enable substitution for CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE (typeof(T)) + // but in order to benefit from that, we need to move various "typeof + IsValueType" + // optimizations from importer to morph. + + default: + break; + } + + if (substitute || (!varTypeIsStruct(lclTyp) && !argInfo.argHasSideEff && !argInfo.argHasGlobRef && + !argInfo.argHasCallerLocalRef)) { /* Get a *LARGE* LCL_VAR node */ op1 = gtNewLclLNode(tmpNum, genActualType(lclTyp) DEBUGARG(lclNum)); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 057c318e81dcf..1b581100f2ebd 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -11598,6 +11598,54 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) case GT_PUTARG_TYPE: return fgMorphTree(tree->AsUnOp()->gtGetOp1()); +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + { + GenTreeHWIntrinsic* hw = tree->AsHWIntrinsic(); + switch (hw->gtHWIntrinsicId) + { + case NI_SSE2_Xor: + { + // Optimize Sse2.Xor(x, Vector128.Zero) to x + GenTree* op1 = hw->gtGetOp1(); + GenTree* op2 = hw->gtGetOp2(); + if (op1->OperIsHWIntrinsic() && (op1->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector128_get_Zero)) + { + INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return op2; + } + if (op2->OperIsHWIntrinsic() && (op2->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector128_get_Zero)) + { + INDEBUG(op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return op1; + } + break; + } + + case NI_AVX_Xor: + { + // Optimize Avx.Xor(x, Vector128.Zero) to x + GenTree* op1 = hw->gtGetOp1(); + GenTree* op2 = hw->gtGetOp2(); + if (op1->OperIsHWIntrinsic() && (op1->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector256_get_Zero)) + { + INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return op2; + } + if (op2->OperIsHWIntrinsic() && (op2->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector256_get_Zero)) + { + INDEBUG(op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return op1; + } + break; + } + + default: + break; + } + } +#endif + default: break; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs index 65a4bb0fd2077..ae353fba5af95 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs @@ -116,6 +116,15 @@ public bool Equals(Vector128 other) return Sse.MoveMask(result) == 0b1111; // We have one bit per element } + if (Sse41.IsSupported && (typeof(T) != typeof(double))) + { + Debug.Assert((typeof(T) != typeof(float)) + + // xor + testz is slightly better for integer types + Vector128 xored = Sse2.Xor(this.AsByte(), other.AsByte()); + return Sse41.TestZ(xored, xored); + } + if (Sse2.IsSupported) { if (typeof(T) == typeof(double)) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs index e336034617785..401539f7f7903 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs @@ -125,17 +125,9 @@ public bool Equals(Vector256 other) Vector256 result = Avx.Compare(this.AsDouble(), other.AsDouble(), FloatComparisonMode.OrderedEqualNonSignaling); return Avx.MoveMask(result) == 0b1111; // We have one bit per element } - } - - if (Avx2.IsSupported) - { - // Unlike float/double, there are no special values to consider - // for integral types and we can just do a comparison that all - // bytes are exactly the same. - Debug.Assert((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))); - Vector256 result = Avx2.CompareEqual(this.AsByte(), other.AsByte()); - return Avx2.MoveMask(result) == unchecked((int)(0b1111_1111_1111_1111_1111_1111_1111_1111)); // We have one bit per element + Vector256 xored = Avx.Xor(this.AsByte(), other.AsByte()); + return Avx.TestZ(xored, xored); } return SoftwareFallback(in this, other); From c1294244423f1d3fc5ec0e595f127fb15dbe4d38 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 17 Jul 2021 21:17:26 +0300 Subject: [PATCH 2/7] Clean up --- src/coreclr/jit/morph.cpp | 104 ++++++++++-------- .../System/Runtime/Intrinsics/Vector128_1.cs | 2 +- .../System/Runtime/Intrinsics/Vector256_1.cs | 11 +- 3 files changed, 67 insertions(+), 50 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 1b581100f2ebd..1b7958f65680d 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -11598,54 +11598,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) case GT_PUTARG_TYPE: return fgMorphTree(tree->AsUnOp()->gtGetOp1()); -#ifdef FEATURE_HW_INTRINSICS - case GT_HWINTRINSIC: - { - GenTreeHWIntrinsic* hw = tree->AsHWIntrinsic(); - switch (hw->gtHWIntrinsicId) - { - case NI_SSE2_Xor: - { - // Optimize Sse2.Xor(x, Vector128.Zero) to x - GenTree* op1 = hw->gtGetOp1(); - GenTree* op2 = hw->gtGetOp2(); - if (op1->OperIsHWIntrinsic() && (op1->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector128_get_Zero)) - { - INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return op2; - } - if (op2->OperIsHWIntrinsic() && (op2->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector128_get_Zero)) - { - INDEBUG(op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return op1; - } - break; - } - - case NI_AVX_Xor: - { - // Optimize Avx.Xor(x, Vector128.Zero) to x - GenTree* op1 = hw->gtGetOp1(); - GenTree* op2 = hw->gtGetOp2(); - if (op1->OperIsHWIntrinsic() && (op1->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector256_get_Zero)) - { - INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return op2; - } - if (op2->OperIsHWIntrinsic() && (op2->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector256_get_Zero)) - { - INDEBUG(op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return op1; - } - break; - } - - default: - break; - } - } -#endif - default: break; } @@ -14338,6 +14290,62 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) } break; +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) + case GT_HWINTRINSIC: + { + GenTreeHWIntrinsic* hw = tree->AsHWIntrinsic(); + switch (hw->gtHWIntrinsicId) + { + case NI_SSE2_Xor: + { + // Optimize Sse2.Xor(x, Vector128.Zero) to x + GenTree* op1 = hw->gtGetOp1(); + GenTree* op2 = hw->gtGetOp2(); + if (op1->OperIsHWIntrinsic() && (op1->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector128_get_Zero)) + { + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op1); + INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return op2; + } + if (op2->OperIsHWIntrinsic() && (op2->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector128_get_Zero)) + { + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op2); + INDEBUG(op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return op1; + } + break; + } + + case NI_AVX2_Xor: + { + // Optimize Avx.Xor(x, Vector128.Zero) to x + GenTree* op1 = hw->gtGetOp1(); + GenTree* op2 = hw->gtGetOp2(); + if (op1->OperIsHWIntrinsic() && (op1->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector256_get_Zero)) + { + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op1); + INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return op2; + } + if (op2->OperIsHWIntrinsic() && (op2->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector256_get_Zero)) + { + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op2); + INDEBUG(op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return op1; + } + break; + } + + default: + break; + } + } +#endif // defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) + default: break; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs index ae353fba5af95..00713f20ebf71 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs @@ -118,7 +118,7 @@ public bool Equals(Vector128 other) if (Sse41.IsSupported && (typeof(T) != typeof(double))) { - Debug.Assert((typeof(T) != typeof(float)) + Debug.Assert(typeof(T) != typeof(float)); // xor + testz is slightly better for integer types Vector128 xored = Sse2.Xor(this.AsByte(), other.AsByte()); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs index 401539f7f7903..74b56a03b4db0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs @@ -125,8 +125,17 @@ public bool Equals(Vector256 other) Vector256 result = Avx.Compare(this.AsDouble(), other.AsDouble(), FloatComparisonMode.OrderedEqualNonSignaling); return Avx.MoveMask(result) == 0b1111; // We have one bit per element } + } + + if (Avx2.IsSupported) + { + // Unlike float/double, there are no special values to consider + // for integral types and we can just do a comparison that all + // bytes are exactly the same. + + Debug.Assert((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))); - Vector256 xored = Avx.Xor(this.AsByte(), other.AsByte()); + Vector256 xored = Avx2.Xor(this.AsByte(), other.AsByte()); return Avx.TestZ(xored, xored); } From 16cc2341a4bccc772d219cf424db93510a9f3373 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 17 Jul 2021 21:27:30 +0300 Subject: [PATCH 3/7] Clean up --- src/coreclr/jit/morph.cpp | 40 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 1b7958f65680d..c85f53f0c5a87 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14296,41 +14296,33 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) GenTreeHWIntrinsic* hw = tree->AsHWIntrinsic(); switch (hw->gtHWIntrinsicId) { + case NI_SSE_Xor: case NI_SSE2_Xor: - { - // Optimize Sse2.Xor(x, Vector128.Zero) to x - GenTree* op1 = hw->gtGetOp1(); - GenTree* op2 = hw->gtGetOp2(); - if (op1->OperIsHWIntrinsic() && (op1->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector128_get_Zero)) - { - DEBUG_DESTROY_NODE(tree); - DEBUG_DESTROY_NODE(op1); - INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return op2; - } - if (op2->OperIsHWIntrinsic() && (op2->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector128_get_Zero)) - { - DEBUG_DESTROY_NODE(tree); - DEBUG_DESTROY_NODE(op2); - INDEBUG(op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return op1; - } - break; - } - + case NI_AVX_Xor: case NI_AVX2_Xor: { - // Optimize Avx.Xor(x, Vector128.Zero) to x + // Optimize Xor(x, Vector_.Zero) to just x GenTree* op1 = hw->gtGetOp1(); GenTree* op2 = hw->gtGetOp2(); - if (op1->OperIsHWIntrinsic() && (op1->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector256_get_Zero)) + + // Is node - Vector_.Zero ? + auto isHwZero = [](GenTree* node) -> bool { + if (node->OperIs(GT_HWINTRINSIC)) + { + NamedIntrinsic ni = node->AsHWIntrinsic()->gtHWIntrinsicId; + return (ni == NI_Vector128_get_Zero) || (ni == NI_Vector256_get_Zero); + } + return false; + }; + + if (isHwZero(op1)) { DEBUG_DESTROY_NODE(tree); DEBUG_DESTROY_NODE(op1); INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); return op2; } - if (op2->OperIsHWIntrinsic() && (op2->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector256_get_Zero)) + if (isHwZero(op2)) { DEBUG_DESTROY_NODE(tree); DEBUG_DESTROY_NODE(op2); From b846016e15b1bc6e9cda4aaa4b639ca4032828d3 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 17 Jul 2021 21:52:45 +0300 Subject: [PATCH 4/7] Add missing break --- src/coreclr/jit/morph.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index c85f53f0c5a87..c8b5ea22b362c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14335,6 +14335,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) default: break; } + break; } #endif // defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) From cd3285015c51f8c34798ac11a8c05dcc6995d162 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 20 Jul 2021 15:23:49 +0300 Subject: [PATCH 5/7] Address feedback --- src/coreclr/jit/morph.cpp | 40 +++++++------------ .../System/Runtime/Intrinsics/Vector128_1.cs | 15 +++---- 2 files changed, 21 insertions(+), 34 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index c8b5ea22b362c..11ca5e76b8621 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14296,38 +14296,28 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) GenTreeHWIntrinsic* hw = tree->AsHWIntrinsic(); switch (hw->gtHWIntrinsicId) { - case NI_SSE_Xor: case NI_SSE2_Xor: - case NI_AVX_Xor: case NI_AVX2_Xor: { - // Optimize Xor(x, Vector_.Zero) to just x + // Transform XOR(X, 0) to X for vectors GenTree* op1 = hw->gtGetOp1(); GenTree* op2 = hw->gtGetOp2(); - - // Is node - Vector_.Zero ? - auto isHwZero = [](GenTree* node) -> bool { - if (node->OperIs(GT_HWINTRINSIC)) + if (!gtIsActiveCSE_Candidate(tree)) + { + if (op1->IsIntegralConstVector(0) && !gtIsActiveCSE_Candidate(op1)) { - NamedIntrinsic ni = node->AsHWIntrinsic()->gtHWIntrinsicId; - return (ni == NI_Vector128_get_Zero) || (ni == NI_Vector256_get_Zero); + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op1); + INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return op2; + } + if (op2->IsIntegralConstVector(0) && !gtIsActiveCSE_Candidate(op2)) + { + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op2); + INDEBUG(op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return op1; } - return false; - }; - - if (isHwZero(op1)) - { - DEBUG_DESTROY_NODE(tree); - DEBUG_DESTROY_NODE(op1); - INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return op2; - } - if (isHwZero(op2)) - { - DEBUG_DESTROY_NODE(tree); - DEBUG_DESTROY_NODE(op2); - INDEBUG(op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return op1; } break; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs index 00713f20ebf71..e1ebac344cab0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs @@ -116,15 +116,6 @@ public bool Equals(Vector128 other) return Sse.MoveMask(result) == 0b1111; // We have one bit per element } - if (Sse41.IsSupported && (typeof(T) != typeof(double))) - { - Debug.Assert(typeof(T) != typeof(float)); - - // xor + testz is slightly better for integer types - Vector128 xored = Sse2.Xor(this.AsByte(), other.AsByte()); - return Sse41.TestZ(xored, xored); - } - if (Sse2.IsSupported) { if (typeof(T) == typeof(double)) @@ -132,6 +123,12 @@ public bool Equals(Vector128 other) Vector128 result = Sse2.CompareEqual(this.AsDouble(), other.AsDouble()); return Sse2.MoveMask(result) == 0b11; // We have one bit per element } + else if (Sse41.IsSupported) + { + // xor + testz is slightly better for integer types + Vector128 xored = Sse2.Xor(this.AsByte(), other.AsByte()); + return Sse41.TestZ(xored, xored); + } else { // Unlike float/double, there are no special values to consider From b8926663b5c80b04cfdb9a3ccb0189051e271168 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 7 Sep 2021 11:25:14 +0300 Subject: [PATCH 6/7] Update src/coreclr/jit/importer.cpp Co-authored-by: Tanner Gooding --- src/coreclr/jit/importer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index ea41eff2d0570..93da85a441729 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -20226,7 +20226,7 @@ GenTree* Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, In case GT_HWINTRINSIC: { // Enable for all parameterless (=invariant) hw intrinsics such as - // Vector128<>.Empty and Vector256<>.AllBitSets. We might consider + // Vector128<>.Zero and Vector256<>.AllBitSets. We might consider // doing that for Vector.Create(cns) as well. if ((argNode->gtGetOp1() == nullptr) && (argNode->gtGetOp2() == nullptr)) { From 33e304a1f11a337cbd8735d23e1173356ce63de0 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 7 Sep 2021 11:47:59 +0300 Subject: [PATCH 7/7] Address feedback --- src/coreclr/jit/morph.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 7f4737fcfd18b..da5ccbbefd3f1 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14229,7 +14229,9 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) GenTreeHWIntrinsic* hw = tree->AsHWIntrinsic(); switch (hw->gtHWIntrinsicId) { + case NI_SSE_Xor: case NI_SSE2_Xor: + case NI_AVX_Xor: case NI_AVX2_Xor: { // Transform XOR(X, 0) to X for vectors