Skip to content

Commit

Permalink
Lowering subset of Vector512 methods for avx512. (#82953)
Browse files Browse the repository at this point in the history
* Load(), LoadUnsafe(), LoadAligned(), LoadAlignedNonTemporal()

* Store(), StoreUnsafe(), StoreAligned(), StoreAlignedNonTemporal()

* Fixing 'HasSideEffect()' check used for RedundantMov
  • Loading branch information
DeepakRajendrakumaran authored Mar 8, 2023
1 parent 633dcab commit 272fb4e
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 14 deletions.
51 changes: 49 additions & 2 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5843,7 +5843,13 @@ bool emitter::IsMovInstruction(instruction ins)
case INS_movaps:
case INS_movd:
case INS_movdqa:
case INS_movdqa32:
case INS_movdqa64:
case INS_movdqu:
case INS_movdqu8:
case INS_movdqu16:
case INS_movdqu32:
case INS_movdqu64:
case INS_movsdsse2:
case INS_movss:
case INS_movsx:
Expand Down Expand Up @@ -5927,8 +5933,23 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size)
case INS_movupd:
case INS_movups:
{
// non EA_32BYTE moves clear the upper bits under VEX encoding
hasSideEffect = UseVEXEncoding() && (size != EA_32BYTE);
// TODO-XArch-AVX512 : Handle merge/masks scenarios once k-mask support is added for these.
// non EA_32BYTE and EA_64BYTE moves clear the upper bits under VEX and EVEX encoding respectively.
if (UseVEXEncoding())
{
if (UseEvexEncoding())
{
hasSideEffect = (size != EA_64BYTE);
}
else
{
hasSideEffect = (size != EA_32BYTE);
}
}
else
{
hasSideEffect = false;
}
break;
}

Expand Down Expand Up @@ -5963,6 +5984,20 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size)
break;
}

case INS_movdqa32:
case INS_movdqa64:
case INS_movdqu8:
case INS_movdqu16:
case INS_movdqu32:
case INS_movdqu64:
{
// These EVEX instructions merges/masks based on k-register
// TODO-XArch-AVX512 : Handle merge/masks scenarios once k-mask support is added for these.
assert(UseEvexEncoding());
hasSideEffect = (size != EA_64BYTE);
break;
}

case INS_movsxd:
{
// Sign-extends the source
Expand Down Expand Up @@ -6152,7 +6187,13 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN
case INS_movapd:
case INS_movaps:
case INS_movdqa:
case INS_movdqa32:
case INS_movdqa64:
case INS_movdqu:
case INS_movdqu8:
case INS_movdqu16:
case INS_movdqu32:
case INS_movdqu64:
case INS_movsdsse2:
case INS_movss:
case INS_movupd:
Expand Down Expand Up @@ -17350,7 +17391,13 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;

case INS_movdqa:
case INS_movdqa32:
case INS_movdqa64:
case INS_movdqu:
case INS_movdqu8:
case INS_movdqu16:
case INS_movdqu32:
case INS_movdqu64:
case INS_movaps:
case INS_movups:
case INS_movapd:
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ bool IsWEvexOpcodeExtension(const instrDesc* id)
case INS_vfnmsub231sd:
case INS_unpcklpd:
case INS_vpermilpdvar:
case INS_movdqa64:
case INS_movdqu16:
case INS_movdqu64:
case INS_vinsertf64x4:
Expand Down Expand Up @@ -402,6 +403,7 @@ bool IsWEvexOpcodeExtension(const instrDesc* id)
case INS_vpdpbusds:
case INS_vpdpwssds:
case INS_vpermilpsvar:
case INS_movdqa32:
case INS_movdqu8:
case INS_movdqu32:
case INS_vinsertf32x8:
Expand Down
32 changes: 28 additions & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18931,6 +18931,7 @@ bool GenTree::isContainableHWIntrinsic() const
case NI_SSE2_LoadAlignedVector128:
case NI_SSE2_LoadScalarVector128:
case NI_AVX_LoadAlignedVector256:
case NI_AVX512F_LoadAlignedVector512:
{
// These loads are contained as part of a HWIntrinsic operation
return true;
Expand Down Expand Up @@ -21556,7 +21557,12 @@ GenTree* Compiler::gtNewSimdLoadAlignedNode(

NamedIntrinsic intrinsic = NI_Illegal;

if (simdSize == 32)
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_LoadAlignedVector512;
}
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_AVX_LoadAlignedVector256;
Expand Down Expand Up @@ -21617,7 +21623,15 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(
// We don't guarantee a non-temporal load will actually occur, so fallback
// to regular aligned loads if the required ISA isn't supported.

if (simdSize == 32)
if (simdSize == 64)
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
{
intrinsic = NI_AVX512F_LoadAlignedVector512NonTemporal;
isNonTemporal = true;
}
}
else if (simdSize == 32)
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX2))
{
Expand Down Expand Up @@ -22878,7 +22892,12 @@ GenTree* Compiler::gtNewSimdStoreAlignedNode(

NamedIntrinsic intrinsic = NI_Illegal;

if (simdSize == 32)
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_StoreAligned;
}
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_AVX_StoreAligned;
Expand Down Expand Up @@ -22935,7 +22954,12 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(

NamedIntrinsic intrinsic = NI_Illegal;

if (simdSize == 32)
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_StoreAlignedNonTemporal;
}
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_AVX_StoreAlignedNonTemporal;
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,14 @@ HARDWARE_INTRINSIC(Vector256, Xor,
// Vector512 Intrinsics
HARDWARE_INTRINSIC(Vector512, Create, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector512, Load, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, LoadAligned, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, LoadAlignedNonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, LoadUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, Store, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, StoreUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
Expand Down Expand Up @@ -729,6 +737,11 @@ HARDWARE_INTRINSIC(AVX2, Xor,
// AVX512F Intrinsics
HARDWARE_INTRINSIC(AVX512F, BroadcastScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad)
HARDWARE_INTRINSIC(AVX512F, InsertVector256, 64, 3, {INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf64x4, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512, 64, 1, {INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa64, INS_movdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512NonTemporal, 64, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512F, StoreAligned, 64, 2, {INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa64, INS_movdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX512F, StoreAlignedNonTemporal, 64, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)


// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1665,8 +1665,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_AVX_LoadVector256:
case NI_Vector128_Load:
case NI_Vector256_Load:
case NI_Vector512_Load:
case NI_Vector128_LoadUnsafe:
case NI_Vector256_LoadUnsafe:
case NI_Vector512_LoadUnsafe:
{
if (sig->numArgs == 2)
{
Expand Down Expand Up @@ -1698,6 +1700,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_LoadAligned:
case NI_Vector256_LoadAligned:
case NI_Vector512_LoadAligned:
{
assert(sig->numArgs == 1);

Expand All @@ -1716,6 +1719,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_LoadAlignedNonTemporal:
case NI_Vector256_LoadAlignedNonTemporal:
case NI_Vector512_LoadAlignedNonTemporal:
{
assert(sig->numArgs == 1);

Expand Down Expand Up @@ -2086,8 +2090,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_Store:
case NI_Vector256_Store:
case NI_Vector512_Store:
case NI_Vector128_StoreUnsafe:
case NI_Vector256_StoreUnsafe:
case NI_Vector512_StoreUnsafe:
{
assert(retType == TYP_VOID);
var_types simdType = getSIMDTypeForSize(simdSize);
Expand Down Expand Up @@ -2130,6 +2136,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_StoreAligned:
case NI_Vector256_StoreAligned:
case NI_Vector512_StoreAligned:
{
assert(sig->numArgs == 2);
assert(retType == TYP_VOID);
Expand All @@ -2155,6 +2162,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_StoreAlignedNonTemporal:
case NI_Vector256_StoreAlignedNonTemporal:
case NI_Vector512_StoreAlignedNonTemporal:
{
assert(sig->numArgs == 2);
assert(retType == TYP_VOID);
Expand Down
Loading

0 comments on commit 272fb4e

Please sign in to comment.