From 04690205c07a07a242155e88fff0acb730f3dc73 Mon Sep 17 00:00:00 2001 From: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com> Date: Wed, 24 Aug 2022 12:42:22 +0300 Subject: [PATCH] Contain memory operands under casts (#72719) * Add GenTreeCast::IsZeroExtending * Cast descriptor support * XARCH support * ARM/ARM64 support TODO: consider using a dedicated IND_EXT oper for ARM/ARM64 instead of containment. This would allow us to cleany handle all indirections. It would not mean we'd give up on the casts containment, as we'd still need to handle the "reg optional" case. IND_EXT will be much like an ordinary IND, but have a "source" and "target" types. The "target" type would always be int/long, while "source" could be of any integral type. This design would be a bit more natural, and nicely separable from casts. However, the main problem with the current state of things, apart from the fact codegen of indirections is tied strongly to "GenTreeIndir", is the fact that changing type of the load can invalidate LEA containment. One would think this is solvable with some tricks, like re-running containment analysis on an indirection after processing the cast, but ARM64 codegen doesn't support uncontained LEAs in some cases. A possible solution to that problem is uncontaining the whole address tree. That would be messy, but ought to work. An additional complication is that these trees can contain a lot of contained operands as part of ADDEX and BFIZ, so what would have to be done first is the making of these into proper EXOPs. In any case, this is all future work. --- src/coreclr/jit/codegen.h | 7 +++ src/coreclr/jit/codegenarmarch.cpp | 74 ++++++++++++++++++++++++++---- src/coreclr/jit/codegenlinear.cpp | 49 +++++++++++++++++++- src/coreclr/jit/codegenxarch.cpp | 34 ++++++++++---- src/coreclr/jit/gentree.h | 16 +++++++ src/coreclr/jit/jitconfigvalues.h | 4 -- src/coreclr/jit/lower.cpp | 9 +++- src/coreclr/jit/lower.h | 15 ++++-- src/coreclr/jit/lowerarmarch.cpp | 59 +++++++++++++++++++++++- src/coreclr/jit/lowerxarch.cpp | 46 ++++++++++++++----- 10 files changed, 270 insertions(+), 43 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 16c0ee588fabc6..3502673faacac1 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -993,6 +993,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ZERO_EXTEND_INT, SIGN_EXTEND_INT, #endif + LOAD_ZERO_EXTEND_SMALL_INT, + LOAD_SIGN_EXTEND_SMALL_INT, +#ifdef TARGET_64BIT + LOAD_ZERO_EXTEND_INT, + LOAD_SIGN_EXTEND_INT, +#endif + LOAD_SOURCE }; private: diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 536797eaba9593..0702ee2f737fba 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3898,25 +3898,24 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d // cast - The GT_CAST node // // Assumptions: -// The cast node is not a contained node and must have an assigned register. // Neither the source nor target type can be a floating point type. // -// TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register. -// void CodeGen::genIntToIntCast(GenTreeCast* cast) { - genConsumeRegs(cast->gtGetOp1()); + genConsumeRegs(cast->CastOp()); - const regNumber srcReg = cast->gtGetOp1()->GetRegNum(); + GenTree* const src = cast->CastOp(); + const regNumber srcReg = src->isUsedFromReg() ? src->GetRegNum() : REG_NA; const regNumber dstReg = cast->GetRegNum(); + emitter* const emit = GetEmitter(); - assert(genIsValidIntReg(srcReg)); assert(genIsValidIntReg(dstReg)); GenIntCastDesc desc(cast); if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE) { + assert(genIsValidIntReg(srcReg)); genIntCastOverflowCheck(cast, desc, srcReg); } @@ -3944,15 +3943,70 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) ins = INS_sxtw; insSize = 8; break; -#endif - default: - assert(desc.ExtendKind() == GenIntCastDesc::COPY); +#endif // TARGET_64BIT + case GenIntCastDesc::COPY: ins = INS_mov; insSize = desc.ExtendSrcSize(); break; + case GenIntCastDesc::LOAD_ZERO_EXTEND_SMALL_INT: + ins = (desc.ExtendSrcSize() == 1) ? INS_ldrb : INS_ldrh; + insSize = TARGET_POINTER_SIZE; + break; + case GenIntCastDesc::LOAD_SIGN_EXTEND_SMALL_INT: + ins = (desc.ExtendSrcSize() == 1) ? INS_ldrsb : INS_ldrsh; + insSize = TARGET_POINTER_SIZE; + break; +#ifdef TARGET_64BIT + case GenIntCastDesc::LOAD_ZERO_EXTEND_INT: + ins = INS_ldr; + insSize = 4; + break; + case GenIntCastDesc::LOAD_SIGN_EXTEND_INT: + ins = INS_ldrsw; + insSize = 8; + break; +#endif // TARGET_64BIT + case GenIntCastDesc::LOAD_SOURCE: + ins = ins_Load(src->TypeGet()); + insSize = genTypeSize(genActualType(src)); + break; + + default: + unreached(); } - GetEmitter()->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, /* canSkip */ false); + if (srcReg != REG_NA) + { + emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, /* canSkip */ false); + } + else + { + // The "used from memory" case. On ArmArch casts are the only nodes which can have + // contained memory operands, so we have to handle all possible sources "manually". + assert(src->isUsedFromMemory()); + + if (src->isUsedFromSpillTemp()) + { + assert(src->IsRegOptional()); + + TempDsc* tmpDsc = getSpillTempDsc(src); + unsigned tmpNum = tmpDsc->tdTempNum(); + regSet.tmpRlsTemp(tmpDsc); + + emit->emitIns_R_S(ins, EA_ATTR(insSize), dstReg, tmpNum, 0); + } + else if (src->OperIsLocal()) + { + emit->emitIns_R_S(ins, EA_ATTR(insSize), dstReg, src->AsLclVarCommon()->GetLclNum(), + src->AsLclVarCommon()->GetLclOffs()); + } + else + { + assert(src->OperIs(GT_IND) && !src->AsIndir()->IsVolatile() && !src->AsIndir()->IsUnaligned()); + emit->emitIns_R_R_I(ins, EA_ATTR(insSize), dstReg, src->AsIndir()->Base()->GetRegNum(), + static_cast(src->AsIndir()->Offset())); + } + } } genProduceReg(cast); diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index e3ac21ba504a27..d67c23dc85c4b7 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2448,7 +2448,8 @@ void CodeGen::genCodeForCast(GenTreeOp* tree) CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) { - const var_types srcType = genActualType(cast->gtGetOp1()->TypeGet()); + GenTree* const src = cast->CastOp(); + const var_types srcType = genActualType(src); const bool srcUnsigned = cast->IsUnsigned(); const unsigned srcSize = genTypeSize(srcType); const var_types castType = cast->gtCastType; @@ -2457,7 +2458,9 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) const var_types dstType = genActualType(cast->TypeGet()); const unsigned dstSize = genTypeSize(dstType); const bool overflow = cast->gtOverflow(); + const bool castIsLoad = !src->isUsedFromReg(); + assert(castIsLoad == src->isUsedFromMemory()); assert((srcSize == 4) || (srcSize == genTypeSize(TYP_I_IMPL))); assert((dstSize == 4) || (dstSize == genTypeSize(TYP_I_IMPL))); @@ -2473,7 +2476,7 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) // values of the castType without risk of integer overflow. const int castNumBits = (castSize * 8) - (castUnsigned ? 0 : 1); m_checkSmallIntMax = (1 << castNumBits) - 1; - m_checkSmallIntMin = (castUnsigned | srcUnsigned) ? 0 : (-m_checkSmallIntMax - 1); + m_checkSmallIntMin = (castUnsigned || srcUnsigned) ? 0 : (-m_checkSmallIntMax - 1); m_extendKind = COPY; m_extendSrcSize = dstSize; @@ -2568,6 +2571,48 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) m_extendKind = COPY; m_extendSrcSize = srcSize; } + + if (castIsLoad) + { + const var_types srcLoadType = src->TypeGet(); + + switch (m_extendKind) + { + case ZERO_EXTEND_SMALL_INT: // small type/int/long -> ubyte/ushort + assert(varTypeIsUnsigned(srcLoadType) || (genTypeSize(srcLoadType) >= genTypeSize(castType))); + m_extendKind = LOAD_ZERO_EXTEND_SMALL_INT; + m_extendSrcSize = min(genTypeSize(srcLoadType), genTypeSize(castType)); + break; + + case SIGN_EXTEND_SMALL_INT: // small type/int/long -> byte/short + assert(varTypeIsSigned(srcLoadType) || (genTypeSize(srcLoadType) >= genTypeSize(castType))); + m_extendKind = LOAD_SIGN_EXTEND_SMALL_INT; + m_extendSrcSize = min(genTypeSize(srcLoadType), genTypeSize(castType)); + break; + +#ifdef TARGET_64BIT + case ZERO_EXTEND_INT: // ubyte/ushort/int -> long. + assert(varTypeIsUnsigned(srcLoadType) || (srcLoadType == TYP_INT)); + m_extendKind = varTypeIsSmall(srcLoadType) ? LOAD_ZERO_EXTEND_SMALL_INT : LOAD_ZERO_EXTEND_INT; + m_extendSrcSize = genTypeSize(srcLoadType); + break; + + case SIGN_EXTEND_INT: // byte/short/int -> long. + assert(varTypeIsSigned(srcLoadType) || (srcLoadType == TYP_INT)); + m_extendKind = varTypeIsSmall(srcLoadType) ? LOAD_SIGN_EXTEND_SMALL_INT : LOAD_SIGN_EXTEND_INT; + m_extendSrcSize = genTypeSize(srcLoadType); + break; +#endif // TARGET_64BIT + + case COPY: // long -> long, small type/int/long -> int. + m_extendKind = LOAD_SOURCE; + m_extendSrcSize = 0; + break; + + default: + unreached(); + } + } } #if !defined(TARGET_64BIT) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 271b24c22bebb8..d5bbe76dc1e528 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -6752,27 +6752,25 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d // cast - The GT_CAST node // // Assumptions: -// The cast node is not a contained node and must have an assigned register. // Neither the source nor target type can be a floating point type. // On x86 casts to (U)BYTE require that the source be in a byte register. // -// TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register. -// void CodeGen::genIntToIntCast(GenTreeCast* cast) { - genConsumeRegs(cast->gtGetOp1()); + genConsumeRegs(cast->CastOp()); - const regNumber srcReg = cast->gtGetOp1()->GetRegNum(); + GenTree* const src = cast->CastOp(); + const regNumber srcReg = src->isUsedFromReg() ? src->GetRegNum() : REG_NA; const regNumber dstReg = cast->GetRegNum(); emitter* emit = GetEmitter(); - assert(genIsValidIntReg(srcReg)); assert(genIsValidIntReg(dstReg)); GenIntCastDesc desc(cast); if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE) { + assert(genIsValidIntReg(srcReg)); genIntCastOverflowCheck(cast, desc, srcReg); } @@ -6783,33 +6781,51 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) switch (desc.ExtendKind()) { case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: + case GenIntCastDesc::LOAD_ZERO_EXTEND_SMALL_INT: ins = INS_movzx; insSize = desc.ExtendSrcSize(); break; case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: + case GenIntCastDesc::LOAD_SIGN_EXTEND_SMALL_INT: ins = INS_movsx; insSize = desc.ExtendSrcSize(); break; #ifdef TARGET_64BIT case GenIntCastDesc::ZERO_EXTEND_INT: + case GenIntCastDesc::LOAD_ZERO_EXTEND_INT: ins = INS_mov; insSize = 4; canSkip = compiler->opts.OptimizationEnabled() && emit->AreUpper32BitsZero(srcReg); break; case GenIntCastDesc::SIGN_EXTEND_INT: + case GenIntCastDesc::LOAD_SIGN_EXTEND_INT: ins = INS_movsxd; insSize = 4; break; #endif - default: - assert(desc.ExtendKind() == GenIntCastDesc::COPY); + case GenIntCastDesc::COPY: ins = INS_mov; insSize = desc.ExtendSrcSize(); canSkip = true; break; + case GenIntCastDesc::LOAD_SOURCE: + ins = ins_Load(src->TypeGet()); + insSize = genTypeSize(src); + break; + + default: + unreached(); } - emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, canSkip); + if (srcReg != REG_NA) + { + emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, canSkip); + } + else + { + assert(src->isUsedFromMemory()); + inst_RV_TT(ins, EA_ATTR(insSize), dstReg, src); + } genProduceReg(cast); } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 81c0c181bfa3ed..25aaf4541c7aaf 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -3858,6 +3858,22 @@ struct GenTreeCast : public GenTreeOp { } #endif + + bool IsZeroExtending() + { + assert(varTypeIsIntegral(CastOp()) && varTypeIsIntegral(CastToType())); + + if (varTypeIsSmall(CastToType())) + { + return varTypeIsUnsigned(CastToType()); + } + if (TypeIs(TYP_LONG) && genActualTypeIsInt(CastOp())) + { + return IsUnsigned(); + } + + return false; + } }; // GT_BOX nodes are place markers for boxed values. The "real" tree diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index bc53afad80c7a3..de8a743cbfff72 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -288,11 +288,7 @@ CONFIG_INTEGER(JitNoRangeChks, W("JitNoRngChks"), 0) // If 1, don't generate ran // AltJitAssertOnNYI should be 0 on targets where JIT is under development or bring up stage, so as to facilitate // fallback to main JIT on hitting a NYI. -#if defined(TARGET_ARM64) || defined(TARGET_X86) -CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 0) // Controls the AltJit behavior of NYI stuff -#else // !defined(TARGET_ARM64) && !defined(TARGET_X86) CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 1) // Controls the AltJit behavior of NYI stuff -#endif // defined(TARGET_ARM64) || defined(TARGET_X86) CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 1) // Enable the register allocator to support EH-write thru: // partial enregistration of vars exposed on EH boundaries diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index e7feb1f819a14e..78c9d85b59ed04 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2828,6 +2828,11 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) op2->ClearContained(); } } + else + { + castOp->ClearContained(); + } + cmp->AsOp()->gtOp1 = castOp; BlockRange().Remove(cast); @@ -5352,6 +5357,7 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par #ifdef TARGET_ARM64 if ((index != nullptr) && index->OperIs(GT_CAST) && (scale == 1) && (offset == 0) && varTypeIsByte(targetType)) { + index->AsCast()->CastOp()->ClearContained(); // Uncontain any memory operands. MakeSrcContained(addrMode, index); } @@ -6111,6 +6117,7 @@ void Lowering::LowerShift(GenTreeOp* shift) // The parent was replaced, clear contain and regOpt flag. shift->gtOp2->ClearContained(); } + ContainCheckShiftRotate(shift); #ifdef TARGET_ARM64 @@ -6129,13 +6136,13 @@ void Lowering::LowerShift(GenTreeOp* shift) unsigned dstBits = genTypeSize(cast) * BITS_PER_BYTE; unsigned srcBits = varTypeIsSmall(cast->CastToType()) ? genTypeSize(cast->CastToType()) * BITS_PER_BYTE : genTypeSize(cast->CastOp()) * BITS_PER_BYTE; - assert(!cast->CastOp()->isContained()); // It has to be an upcast and CNS must be in [1..srcBits) range if ((srcBits < dstBits) && (cns->IconValue() > 0) && (cns->IconValue() < srcBits)) { JITDUMP("Recognized ubfix/sbfix pattern in LSH(CAST, CNS). Changing op to GT_BFIZ"); shift->ChangeOper(GT_BFIZ); + cast->CastOp()->ClearContained(); // Uncontain any memory operands. MakeSrcContained(shift, cast); } } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index b0d880c030227a..f37aa6d2be5e03 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -388,10 +388,11 @@ class Lowering final : public Phase return op; } - GenTreeCast* cast = op->AsCast(); + GenTreeCast* cast = op->AsCast(); + GenTree* castOp = cast->CastOp(); // FP <-> INT casts should be kept - if (varTypeIsFloating(cast->CastFromType()) ^ varTypeIsFloating(expectedType)) + if (varTypeIsFloating(castOp) ^ varTypeIsFloating(expectedType)) { return op; } @@ -402,17 +403,23 @@ class Lowering final : public Phase return op; } + // Keep casts with operands usable from memory. + if (castOp->isContained() || castOp->IsRegOptional()) + { + return op; + } + if (genTypeSize(cast->CastToType()) >= genTypeSize(expectedType)) { #ifndef TARGET_64BIT // Don't expose TYP_LONG on 32bit - if (varTypeIsLong(cast->CastFromType())) + if (castOp->TypeIs(TYP_LONG)) { return op; } #endif BlockRange().Remove(op); - return cast->CastOp(); + return castOp; } return op; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 0ff88151503201..9cab1d427ad1f2 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -307,11 +307,13 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) mul->SetUnsigned(); } + op1->CastOp()->ClearContained(); // Uncontain any memory operands. mul->gtOp1 = op1->CastOp(); BlockRange().Remove(op1); if (op2->OperIs(GT_CAST)) { + op2->AsCast()->CastOp()->ClearContained(); // Uncontain any memory operands. mul->gtOp2 = op2->AsCast()->CastOp(); BlockRange().Remove(op2); } @@ -1881,6 +1883,7 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) if (cast->gtGetOp1()->TypeIs(TYP_INT) && cast->TypeIs(TYP_LONG) && !cast->gtOverflow()) { node->ChangeOper(GT_ADDEX); + cast->AsCast()->CastOp()->ClearContained(); // Uncontain any memory operands. MakeSrcContained(node, cast); } } @@ -2024,11 +2027,63 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const // void Lowering::ContainCheckCast(GenTreeCast* node) { -#ifdef TARGET_ARM GenTree* castOp = node->CastOp(); var_types castToType = node->CastToType(); - var_types srcType = castOp->TypeGet(); + if (comp->opts.OptimizationEnabled() && !node->gtOverflow() && varTypeIsIntegral(castOp) && + varTypeIsIntegral(castToType)) + { + // Most integral casts can be re-expressed as loads, except those that would be changing the sign. + if (!varTypeIsSmall(castOp) || (varTypeIsUnsigned(castOp) == node->IsZeroExtending())) + { + bool srcIsContainable = false; + + // Make sure to only contain indirections codegen can handle. + if (castOp->OperIs(GT_IND)) + { + GenTreeIndir* indir = castOp->AsIndir(); + + if (!indir->IsVolatile() && !indir->IsUnaligned()) + { + GenTree* addr = indir->Addr(); + + if (!addr->isContained()) + { + srcIsContainable = true; + } + else if (addr->OperIs(GT_LEA) && !addr->AsAddrMode()->HasIndex()) + { + var_types loadType = varTypeIsSmall(castToType) ? castToType : castOp->TypeGet(); + + if (emitter::emitIns_valid_imm_for_ldst_offset(addr->AsAddrMode()->Offset(), + emitTypeSize(loadType))) + { + srcIsContainable = true; + } + } + } + } + else + { + assert(castOp->OperIsLocalRead() || !IsContainableMemoryOp(castOp)); + srcIsContainable = true; + } + + if (srcIsContainable && IsSafeToContainMem(node, castOp)) + { + if (IsContainableMemoryOp(castOp)) + { + MakeSrcContained(node, castOp); + } + else + { + castOp->SetRegOptional(); + } + } + } + } + +#ifdef TARGET_ARM if (varTypeIsLong(castOp)) { assert(castOp->OperGet() == GT_LONG); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 5f2e384daa7ce7..a7b1747d54bc09 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -5135,28 +5135,52 @@ void Lowering::ContainCheckCast(GenTreeCast* node) srcType = varTypeToUnsigned(srcType); } - if (!node->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(srcType))) + if (!node->gtOverflow()) { + // Some casts will be able to use the source from memory. + bool srcIsContainable = false; + + if (varTypeIsFloating(castToType) || varTypeIsFloating(srcType)) + { #ifdef DEBUG - // If converting to float/double, the operand must be 4 or 8 byte in size. - if (varTypeIsFloating(castToType)) + // If converting to float/double, the operand must be 4 or 8 byte in size. + if (varTypeIsFloating(castToType)) + { + unsigned opSize = genTypeSize(srcType); + assert(opSize == 4 || opSize == 8); + } +#endif // DEBUG + + // U8 -> R8 conversion requires that the operand be in a register. + if (srcType != TYP_ULONG) + { + if (castOp->IsCnsNonZeroFltOrDbl()) + { + MakeSrcContained(node, castOp); + } + else + { + srcIsContainable = true; + } + } + } + else if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(castOp) && varTypeIsIntegral(castToType)) { - unsigned opSize = genTypeSize(srcType); - assert(opSize == 4 || opSize == 8); + // Most integral casts can be re-expressed as loads, except those that would be changing the sign. + if (!varTypeIsSmall(castOp) || (varTypeIsUnsigned(castOp) == node->IsZeroExtending())) + { + srcIsContainable = true; + } } -#endif // DEBUG - // U8 -> R8 conversion requires that the operand be in a register. - if (srcType != TYP_ULONG) + if (srcIsContainable && IsSafeToContainMem(node, castOp)) { - if ((IsContainableMemoryOp(castOp) && IsSafeToContainMem(node, castOp)) || castOp->IsCnsNonZeroFltOrDbl()) + if (IsContainableMemoryOp(castOp)) { MakeSrcContained(node, castOp); } else { - // Mark castOp as reg optional to indicate codegen - // can still generate code if it is on stack. castOp->SetRegOptional(); } }