Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tweak the register order for xarch to better account for callee saved #88151

Merged
merged 14 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 51 additions & 6 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,16 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
simd8_t val8 = *(simd8_t*)val;
if (val8.IsAllBitsSet())
{
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, EA_16BYTE, targetReg, targetReg, targetReg);
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF));
}
else
{
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, EA_16BYTE, targetReg, targetReg, targetReg);
}
}
else if (val8.IsZero())
{
Expand All @@ -456,7 +465,16 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
simd12_t val12 = *(simd12_t*)val;
if (val12.IsAllBitsSet())
{
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, EA_16BYTE, targetReg, targetReg, targetReg);
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF));
}
else
{
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, EA_16BYTE, targetReg, targetReg, targetReg);
}
}
else if (val12.IsZero())
{
Expand All @@ -476,7 +494,16 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
simd16_t val16 = *(simd16_t*)val;
if (val16.IsAllBitsSet())
{
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF));
}
else
{
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
}
}
else if (val16.IsZero())
{
Expand All @@ -494,7 +521,16 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
simd32_t val32 = *(simd32_t*)val;
if (val32.IsAllBitsSet() && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2))
{
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF));
}
else
{
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
}
}
else if (val32.IsZero())
{
Expand Down Expand Up @@ -592,8 +628,17 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
}
else if (tree->IsFloatAllBitsSet())
{
// A faster/smaller way to generate AllBitsSet
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, EA_16BYTE, targetReg, targetReg, targetReg);
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, EA_16BYTE, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF));
}
else
{
// A faster/smaller way to generate AllBitsSet
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, EA_16BYTE, targetReg, targetReg, targetReg);
}
}
else
{
Expand Down
31 changes: 6 additions & 25 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1570,38 +1570,19 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
bool emitter::HasHighSIMDReg(const instrDesc* id) const
{
#if defined(TARGET_AMD64)
if (IsHighSIMDReg(id->idReg1()) || IsHighSIMDReg(id->idReg2()))
if (isHighSimdReg(id->idReg1()) || isHighSimdReg(id->idReg2()))
return true;

if (id->idIsSmallDsc())
return false;

if ((id->idHasReg3() && IsHighSIMDReg(id->idReg3())) || (id->idHasReg4() && IsHighSIMDReg(id->idReg4())))
if ((id->idHasReg3() && isHighSimdReg(id->idReg3())) || (id->idHasReg4() && isHighSimdReg(id->idReg4())))
return true;
#endif
// X86 JIT operates in 32-bit mode and hence extended reg are not available.
return false;
}

//------------------------------------------------------------------------
// IsHighSIMDReg: Checks if a register is strictly an EVEX encoded high SIMD
// registers (mm16-mm31).
//
// Arguments:
// reg -- register to check
//
// Return Value:
// true if the register is strictly an EVEX encoded high SIMD register
bool emitter::IsHighSIMDReg(regNumber reg) const
{
#ifdef TARGET_AMD64
return ((reg >= REG_XMM16) && (reg <= REG_XMM31));
#else
// X86 JIT operates in 32-bit mode and hence extended reg are not available.
return false;
#endif
}

//------------------------------------------------------------------------
// HasMaskReg: Checks if an instruction uses a KMask registers (k0-k7)
//
Expand Down Expand Up @@ -3160,7 +3141,7 @@ inline unsigned emitter::insEncodeReg012(const instrDesc* id, regNumber reg, emi

if (IsExtendedReg(reg))
{
if (IsHighSIMDReg(reg))
if (isHighSimdReg(reg))
{
*code = AddRexXPrefix(id, *code); // EVEX.X
}
Expand Down Expand Up @@ -3203,7 +3184,7 @@ inline unsigned emitter::insEncodeReg345(const instrDesc* id, regNumber reg, emi

if (IsExtendedReg(reg))
{
if (IsHighSIMDReg(reg))
if (isHighSimdReg(reg))
{
*code = AddEvexRPrimePrefix(*code); // EVEX.R'
}
Expand Down Expand Up @@ -3262,7 +3243,7 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber
// Rather see these paths cleaned up.
regBits = HighAwareRegEncoding(reg);

if (IsHighSIMDReg(reg))
if (isHighSimdReg(reg))
{
// Have to set the EVEX V' bit
code = AddEvexVPrimePrefix(code);
Expand Down Expand Up @@ -3308,7 +3289,7 @@ inline unsigned emitter::insEncodeRegSIB(const instrDesc* id, regNumber reg, cod

if (IsExtendedReg(reg))
{
if (IsHighSIMDReg(reg))
if (isHighSimdReg(reg))
{
*code = AddEvexVPrimePrefix(*code); // EVEX.X
}
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ inline static bool isMaskReg(regNumber reg)
return (reg >= REG_MASK_FIRST && reg <= REG_MASK_LAST);
}

inline static bool isHighSimdReg(regNumber reg)
{
#ifdef TARGET_AMD64
return ((reg >= REG_XMM16) && (reg <= REG_XMM31));
#else
// X86 JIT operates in 32-bit mode and hence extended regs are not available.
return false;
#endif
}

/************************************************************************/
/* Routines that compute the size of / encode instructions */
/************************************************************************/
Expand Down Expand Up @@ -890,7 +900,6 @@ inline bool HasEmbeddedBroadcast(const instrDesc* id) const
}

inline bool HasHighSIMDReg(const instrDesc* id) const;
inline bool IsHighSIMDReg(regNumber) const;

inline bool HasMaskReg(const instrDesc* id) const;

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1861,6 +1861,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node)
lastOp = op3;

// generate all-one mask vector
assert(!emitter::isHighSimdReg(targetReg));
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, maskReg, maskReg, maskReg);
}

Expand Down
47 changes: 31 additions & 16 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1913,12 +1913,13 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc

static const regNumber lsraRegOrder[] = {REG_VAR_ORDER};
const unsigned lsraRegOrderSize = ArrLen(lsraRegOrder);
// TODO-XARCH-AVX512 we might want to move this to be configured with the rbm variables too

static const regNumber lsraRegOrderFlt[] = {REG_VAR_ORDER_FLT};
const unsigned lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt);

#if defined(TARGET_AMD64)
static const regNumber lsraRegOrderFltUpper[] = {REG_VAR_ORDER_FLT_UPPER};
const unsigned lsraRegOrderUpperFltSize = ArrLen(lsraRegOrderFltUpper);
static const regNumber lsraRegOrderFltEvex[] = {REG_VAR_ORDER_FLT_EVEX};
const unsigned lsraRegOrderFltEvexSize = ArrLen(lsraRegOrderFltEvex);
#endif // TARGET_AMD64

//------------------------------------------------------------------------
Expand All @@ -1945,23 +1946,37 @@ void LinearScan::buildPhysRegRecords()
// initializing the floating registers.
// For that `compFloatingPointUsed` should be set accurately
// before invoking allocator.
for (unsigned int i = 0; i < lsraRegOrderFltSize; i++)
{
regNumber reg = lsraRegOrderFlt[i];
RegRecord* curr = &physRegs[reg];
curr->regOrder = (unsigned char)i;
}

const regNumber* regOrderFlt;
unsigned regOrderFltSize;

#if defined(TARGET_AMD64)
// x64 has additional registers available when EVEX is supported
// and that causes a different ordering to be used since they are
// callee trash and should appear at the end up the existing callee
// trash set

if (compiler->canUseEvexEncoding())
{
for (unsigned int i = 0; i < lsraRegOrderUpperFltSize; i++)
{
regNumber reg = lsraRegOrderFltUpper[i];
RegRecord* curr = &physRegs[reg];
curr->regOrder = (unsigned char)(i + lsraRegOrderFltSize);
}
regOrderFlt = &lsraRegOrderFltEvex[0];
regOrderFltSize = lsraRegOrderFltEvexSize;
}
else
{
regOrderFlt = &lsraRegOrderFlt[0];
regOrderFltSize = lsraRegOrderFltSize;
}
#else
regOrderFlt = &lsraRegOrderFlt[0];
regOrderFltSize = lsraRegOrderFltSize;
#endif

for (unsigned int i = 0; i < regOrderFltSize; i++)
{
regNumber reg = regOrderFlt[i];
RegRecord* curr = &physRegs[reg];
curr->regOrder = (unsigned char)i;
}
#endif // TARGET_AMD64
}

//------------------------------------------------------------------------
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,14 @@ int LinearScan::BuildNode(GenTree* tree)
case GT_CNS_VEC:
{
srcCount = 0;

assert(dstCount == 1);
assert(!tree->IsReuseRegVal());
RefPosition* def = BuildDef(tree, BuildEvexIncompatibleMask(tree));
tannergooding marked this conversation as resolved.
Show resolved Hide resolved

RefPosition* def = BuildDef(tree);
def->getInterval()->isConstant = true;
break;
}
break;

#if !defined(TARGET_64BIT)

Expand Down
Loading